]> granicus.if.org Git - icu/commitdiff
ICU-12410 add ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator ...
authorMarkus Scherer <markus.icu@gmail.com>
Mon, 9 Jan 2017 23:52:12 +0000 (23:52 +0000)
committerMarkus Scherer <markus.icu@gmail.com>
Mon, 9 Jan 2017 23:52:12 +0000 (23:52 +0000)
X-SVN-Rev: 39551

icu4c/source/common/ucasemap_titlecase_brkiter.cpp
icu4c/source/common/unicode/ucasemap.h
icu4c/source/common/unicode/unistr.h
icu4c/source/common/unistr_case.cpp
icu4c/source/common/unistr_case_locale.cpp
icu4c/source/common/unistr_titlecase_brkiter.cpp
icu4c/source/common/ustr_imp.h
icu4c/source/common/ustr_titlecase_brkiter.cpp
icu4c/source/common/ustrcase.cpp
icu4c/source/common/ustrcase_locale.cpp

index ab61e21765b7aa7641cb2037dacaab3b70e4a228..4b607114171fb828fcc580e99111b0eb2e3c23b6 100644 (file)
@@ -32,14 +32,13 @@ U_NAMESPACE_USE
 
 U_CAPI const UBreakIterator * U_EXPORT2
 ucasemap_getBreakIterator(const UCaseMap *csm) {
-    return csm->iter;
+    return reinterpret_cast<UBreakIterator *>(csm->iter);
 }
 
 U_CAPI void U_EXPORT2
 ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * /*pErrorCode*/) {
-    // Do not call ubrk_close() so that we do not depend on all of the BreakIterator code.
-    delete reinterpret_cast<BreakIterator *>(csm->iter);
-    csm->iter=iterToAdopt;
+    delete csm->iter;
+    csm->iter=reinterpret_cast<BreakIterator *>(iterToAdopt);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -49,15 +48,13 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
                      UErrorCode *pErrorCode) {
     UText utext=UTEXT_INITIALIZER;
     utext_openUTF8(&utext, (const char *)src, srcLength, pErrorCode);
+    if(csm->iter==NULL) {
+        csm->iter=BreakIterator::createWordInstance(Locale(csm->locale), *pErrorCode);
+    }
     if(U_FAILURE(*pErrorCode)) {
         return 0;
     }
-    if(csm->iter==NULL) {
-        csm->iter=ubrk_open(UBRK_WORD, csm->locale,
-                            NULL, 0,
-                            pErrorCode);
-    }
-    ubrk_setUText(csm->iter, &utext, pErrorCode);
+    csm->iter->setText(&utext, *pErrorCode);
     int32_t length=ucasemap_mapUTF8(csm,
                    (uint8_t *)dest, destCapacity,
                    (const uint8_t *)src, srcLength,
index 2bff9ee1404024d9704860739f9cb1dc6cbcb9f2..6f00b072e3a444e0387b355c2c79ab438c9ec53b 100644 (file)
 #define __UCASEMAP_H__
 
 #include "unicode/utypes.h"
-#include "unicode/ustring.h"
 #include "unicode/localpointer.h"
 
 #if U_SHOW_CPLUSPLUS_API
-
 #include "unicode/uobject.h"
-
 #endif  // U_SHOW_CPLUSPLUS_API
 
+#include "unicode/ustring.h"
+
 /**
  * \file
  * \brief C API: Unicode case mapping functions using a UCaseMap service object.
@@ -89,6 +88,8 @@ ucasemap_close(UCaseMap *csm);
 
 U_NAMESPACE_BEGIN
 
+class BreakIterator;
+
 /**
  * \class LocalUCaseMapPointer
  * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
@@ -107,6 +108,9 @@ U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
  * Supports replacements, insertions, deletions in linear progression.
  * Does not support moving/reordering of text.
  *
+ * An Edits object tracks a separate UErrorCode, but ICU case mapping functions
+ * merge any such errors into their API's UErrorCode.
+ *
  * @internal ICU 59 technology preview
  */
 class Edits final : public UMemory {
@@ -154,21 +158,24 @@ public:
 
     /**
      * Adds a record for an unchanged segment of text.
+     * Normally called from inside ICU case mapping functions, not user code.
      * @internal ICU 59 technology preview
      */
     void addUnchanged(int32_t unchangedLength);
     /**
      * Adds a record for a text replacement/insertion/deletion.
+     * Normally called from inside ICU case mapping functions, not user code.
      * @internal ICU 59 technology preview
      */
     void addReplace(int32_t oldLength, int32_t newLength);
     /**
      * Sets the UErrorCode if an error occurred while recording edits.
      * Preserves older error codes in the outErrorCode.
+     * Normally called from inside ICU case mapping functions, not user code.
      * @return TRUE if U_FAILURE(outErrorCode)
      * @internal ICU 59 technology preview
      */
-    UBool setErrorCode(UErrorCode &outErrorCode);
+    UBool copyErrorTo(UErrorCode &outErrorCode);
 
     /**
      * How much longer is the new text compared with the old text?
@@ -463,7 +470,7 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  * @param edits     Records edits for index mapping, working with styled text,
  *                  and getting only changes (if any). Can be NULL.
- * @param pErrorCode Must be a valid pointer to an error code value,
+ * @param errorCode Reference to an in/out error code value
  *                  which must not indicate a failure before the function call.
  * @return The length of the result string, if successful - or in case of a buffer overflow,
  *         in which case it will be greater than destCapacity.
@@ -476,7 +483,7 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm,
                           UChar *dest, int32_t destCapacity,
                           const UChar *src, int32_t srcLength,
                           icu::Edits *edits,
-                          UErrorCode *pErrorCode);
+                          UErrorCode &errorCode);
 
 /**
  * Uppercases the characters in a UTF-16 string and optionally records edits.
@@ -495,7 +502,7 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm,
  * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
  * @param edits     Records edits for index mapping, working with styled text,
  *                  and getting only changes (if any). Can be NULL.
- * @param pErrorCode Must be a valid pointer to an error code value,
+ * @param errorCode Reference to an in/out error code value
  *                  which must not indicate a failure before the function call.
  * @return The length of the result string, if successful - or in case of a buffer overflow,
  *         in which case it will be greater than destCapacity.
@@ -508,7 +515,99 @@ ucasemap_toUpperWithEdits(const UCaseMap *csm,
                           UChar *dest, int32_t destCapacity,
                           const UChar *src, int32_t srcLength,
                           icu::Edits *edits,
-                          UErrorCode *pErrorCode);
+                          UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param csm       UCaseMap service object.
+ * @param iter      A break iterator to find the first characters of words that are to be titlecased.
+ *                  It is set to the source string and used one or more times for iteration.
+ *                  If NULL, then a clone of ucasemap_getBreakIterator() is used.
+ *                  If that is NULL too, then a word break iterator for the locale is used
+ *                  (or something equivalent).
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param edits     Records edits for index mapping, working with styled text,
+ *                  and getting only changes (if any). Can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @internal ICU 59 technology preview
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter,
+                          UChar *dest, int32_t destCapacity,
+                          const UChar *src, int32_t srcLength,
+                          icu::Edits *edits,
+                          UErrorCode &errorCode);
+
+#endif  // UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Case-folds the characters in a UTF-16 string and optionally records edits.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm       UCaseMap service object.
+ * @param dest      A buffer for the result string. The result will be NUL-terminated if
+ *                  the buffer is large enough.
+ *                  The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ *                  dest may be NULL and the function will only return the length of the result
+ *                  without writing any of the result string.
+ * @param src       The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param edits     Records edits for index mapping, working with styled text,
+ *                  and getting only changes (if any). Can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ *                  which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ *         in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @internal ICU 59 technology preview
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_foldCaseWithEdits(const UCaseMap *csm,
+                           UChar *dest, int32_t destCapacity,
+                           const UChar *src, int32_t srcLength,
+                           icu::Edits *edits,
+                           UErrorCode &errorCode);
 
 #endif  // U_HIDE_INTERNAL_API
 #endif  // U_SHOW_CPLUSPLUS_API
@@ -600,7 +699,7 @@ ucasemap_toTitle(UCaseMap *csm,
                  const UChar *src, int32_t srcLength,
                  UErrorCode *pErrorCode);
 
-#endif
+#endif  // UCONFIG_NO_BREAK_ITERATION
 
 /**
  * Lowercase the characters in a UTF-8 string.
@@ -762,10 +861,13 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
  */
 typedef int32_t U_CALLCONV
 UStringCaseMapper(const UCaseMap *csm,
+#if !UCONFIG_NO_BREAK_ITERATION
+                  icu::BreakIterator *iter,
+#endif
                   UChar *dest, int32_t destCapacity,
                   const UChar *src, int32_t srcLength,
                   icu::Edits *edits,
-                  UErrorCode *pErrorCode);
+                  UErrorCode &errorCode);
 
 #endif  // U_SHOW_CPLUSPLUS_API
 #endif
index 4b677b534f9cfcf042b74ea8d557012d3230d756..350828559da7976016a787fb2fcbd29be7f06766 100644 (file)
@@ -3573,7 +3573,11 @@ private:
    * as in ustr_imp.h for ustrcase_map().
    */
   UnicodeString &
-  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
+  caseMap(const UCaseMap *csm,
+#if !UCONFIG_NO_BREAK_ITERATION
+          BreakIterator *iter,
+#endif
+          UStringCaseMapper *stringCaseMapper);
 
   // ref counting
   void addRef(void);
index e9771d1ded505580b151f72ce9fd543fc947294b..d48831b4b10cd45d815563f4642cdb4dc7ec49c7 100644 (file)
@@ -88,7 +88,7 @@ UnicodeString::doCaseCompare(int32_t start,
 //========================================
 
 UnicodeString &
-UnicodeString::caseMap(const UCaseMap *csm,
+UnicodeString::caseMap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
                        UStringCaseMapper *stringCaseMapper) {
   if(isEmpty() || !isWritable()) {
     // nothing to do
@@ -121,7 +121,9 @@ UnicodeString::caseMap(const UCaseMap *csm,
       buffer = fUnion.fStackFields.fBuffer;
       capacity = US_STACKBUF_SIZE;
     }
-    newLength = stringCaseMapper(csm, buffer, capacity, oldArray, oldLength, NULL, &errorCode);
+    newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
+                                 buffer, capacity,
+                                 oldArray, oldLength, NULL, errorCode);
     if (U_SUCCESS(errorCode)) {
       setLength(newLength);
       return *this;
@@ -140,22 +142,18 @@ UnicodeString::caseMap(const UCaseMap *csm,
     Edits edits;
     edits.setWriteUnchanged(FALSE);
     UChar replacementChars[200];
-    stringCaseMapper(csm, replacementChars, UPRV_LENGTHOF(replacementChars),
-                     oldArray, oldLength, &edits, &errorCode);
-    UErrorCode editsError = U_ZERO_ERROR;
-    if (edits.setErrorCode(editsError)) {
-      setToBogus();
-      return *this;
-    }
-    newLength = oldLength + edits.lengthDelta();
+    stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
+                     replacementChars, UPRV_LENGTHOF(replacementChars),
+                     oldArray, oldLength, &edits, errorCode);
     if (U_SUCCESS(errorCode)) {
       // Grow the buffer at most once, not for multiple doReplace() calls.
+      newLength = oldLength + edits.lengthDelta();
       if (newLength > oldLength && !cloneArrayIfNeeded(newLength, newLength)) {
         return *this;
       }
-      for (Edits::Iterator iter = edits.getCoarseChangesIterator(); iter.next(errorCode);) {
-        doReplace(iter.destinationIndex(), iter.oldLength(),
-                  replacementChars, iter.replacementIndex(), iter.newLength());
+      for (Edits::Iterator ei = edits.getCoarseChangesIterator(); ei.next(errorCode);) {
+        doReplace(ei.destinationIndex(), ei.oldLength(),
+                  replacementChars, ei.replacementIndex(), ei.newLength());
       }
       if (U_FAILURE(errorCode)) {
         setToBogus();
@@ -163,6 +161,7 @@ UnicodeString::caseMap(const UCaseMap *csm,
       return *this;
     } else if (errorCode == U_BUFFER_OVERFLOW_ERROR) {
       // common overflow handling below
+      newLength = oldLength + edits.lengthDelta();
     } else {
       setToBogus();
       return *this;
@@ -179,8 +178,9 @@ UnicodeString::caseMap(const UCaseMap *csm,
     return *this;
   }
   errorCode = U_ZERO_ERROR;
-  newLength = stringCaseMapper(csm, getArrayStart(), getCapacity(),
-                               oldArray, oldLength, NULL, &errorCode);
+  newLength = stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
+                               getArrayStart(), getCapacity(),
+                               oldArray, oldLength, NULL, errorCode);
   if (bufferToDelete) {
     uprv_free(bufferToDelete);
   }
@@ -197,7 +197,7 @@ UnicodeString::foldCase(uint32_t options) {
   UCaseMap csm=UCASEMAP_INITIALIZER;
   csm.csp=ucase_getSingleton();
   csm.options=options;
-  return caseMap(&csm, ustrcase_internalFold);
+  return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalFold);
 }
 
 U_NAMESPACE_END
index a01be5c30b372a54e9f3321e08117f0a63bbaf3b..4bf55777043f8616534be19b12d7b7ef54f6ac39 100644 (file)
@@ -54,7 +54,7 @@ UnicodeString &
 UnicodeString::toLower(const Locale &locale) {
   UCaseMap csm=UCASEMAP_INITIALIZER;
   setTempCaseMap(&csm, locale.getName());
-  return caseMap(&csm, ustrcase_internalToLower);
+  return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToLower);
 }
 
 UnicodeString &
@@ -66,7 +66,7 @@ UnicodeString &
 UnicodeString::toUpper(const Locale &locale) {
   UCaseMap csm=UCASEMAP_INITIALIZER;
   setTempCaseMap(&csm, locale.getName());
-  return caseMap(&csm, ustrcase_internalToUpper);
+  return caseMap(&csm, UCASEMAP_BREAK_ITERATOR_NULL ustrcase_internalToUpper);
 }
 
 U_NAMESPACE_END
index c909133cdbe1346bc64f32e08dcb3d2284dee77d..05d38fb582259440d2cbad72d34928556215f0f8 100644 (file)
 #if !UCONFIG_NO_BREAK_ITERATION
 
 #include "unicode/brkiter.h"
-#include "unicode/ubrk.h"
 #include "unicode/unistr.h"
 #include "unicode/ustring.h"
 #include "cmemory.h"
 #include "ustr_imp.h"
 
-static int32_t U_CALLCONV
-unistr_case_internalToTitle(const UCaseMap *csm,
-                            UChar *dest, int32_t destCapacity,
-                            const UChar *src, int32_t srcLength,
-                            icu::Edits *edits,
-                            UErrorCode *pErrorCode) {
-  ubrk_setText(csm->iter, src, srcLength, pErrorCode);
-  return ustrcase_internalToTitle(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
-}
-
 /*
  * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
  * Do this fast because it is called with every function call.
@@ -80,8 +69,8 @@ UnicodeString::toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t
       return *this;
     }
   }
-  csm.iter=reinterpret_cast<UBreakIterator *>(bi);
-  caseMap(&csm, unistr_case_internalToTitle);
+  bi->setText(*this);
+  caseMap(&csm, bi, ustrcase_internalToTitle);
   if(titleIter==NULL) {
     delete bi;
   }
index 28bdca1e001272ca9f80ad63c9c7b6b5884c5032..52a44545a0ffcd06c5032b8ac95c5eb7d4bfc2df 100644 (file)
@@ -114,7 +114,7 @@ uprv_loadPropsData(UErrorCode *errorCode);*/
 struct UCaseMap {
     const UCaseProps *csp;
 #if !UCONFIG_NO_BREAK_ITERATION
-    UBreakIterator *iter;  /* We adopt the iterator, so we own it. */
+    icu::BreakIterator *iter;  /* We adopt the iterator, so we own it. */
 #endif
     char locale[32];
     int32_t locCache;
@@ -123,8 +123,16 @@ struct UCaseMap {
 
 #if UCONFIG_NO_BREAK_ITERATION
 #   define UCASEMAP_INITIALIZER { NULL, { 0 }, 0, 0 }
+#   define UCASEMAP_BREAK_ITERATOR_PARAM
+#   define UCASEMAP_BREAK_ITERATOR_UNUSED
+#   define UCASEMAP_BREAK_ITERATOR
+#   define UCASEMAP_BREAK_ITERATOR_NULL
 #else
 #   define UCASEMAP_INITIALIZER { NULL, NULL, { 0 }, 0, 0 }
+#   define UCASEMAP_BREAK_ITERATOR_PARAM icu::BreakIterator *iter,
+#   define UCASEMAP_BREAK_ITERATOR_UNUSED icu::BreakIterator *,
+#   define UCASEMAP_BREAK_ITERATOR iter,
+#   define UCASEMAP_BREAK_ITERATOR_NULL NULL,
 #endif
 
 U_CFUNC void
@@ -132,51 +140,52 @@ ustrcase_setTempCaseMapLocale(UCaseMap *csm, const char *locale);
 
 /** Implements UStringCaseMapper. */
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToLower(const UCaseMap *csm,
+ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
                          UChar *dest, int32_t destCapacity,
                          const UChar *src, int32_t srcLength,
                          icu::Edits *edits,
-                         UErrorCode *pErrorCode);
+                         UErrorCode &errorCode);
 
 /** Implements UStringCaseMapper. */
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToUpper(const UCaseMap *csm,
+ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
                          UChar *dest, int32_t destCapacity,
                          const UChar *src, int32_t srcLength,
                          icu::Edits *edits,
-                         UErrorCode *pErrorCode);
+                         UErrorCode &errorCode);
 
 #if !UCONFIG_NO_BREAK_ITERATION
 
 /** Implements UStringCaseMapper. */
 U_CFUNC int32_t U_CALLCONV
 ustrcase_internalToTitle(const UCaseMap *csm,
+                         icu::BreakIterator *iter,
                          UChar *dest, int32_t destCapacity,
                          const UChar *src, int32_t srcLength,
                          icu::Edits *edits,
-                         UErrorCode *pErrorCode);
+                         UErrorCode &errorCode);
 
 #endif
 
 /** Implements UStringCaseMapper. */
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalFold(const UCaseMap *csm,
+ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
                       UChar *dest, int32_t destCapacity,
                       const UChar *src, int32_t srcLength,
                       icu::Edits *edits,
-                      UErrorCode *pErrorCode);
+                      UErrorCode &errorCode);
 
 /**
  * Common string case mapping implementation for ucasemap_toXyz() and UnicodeString::toXyz().
  * Implements argument checking.
  */
 U_CFUNC int32_t
-ustrcase_map(const UCaseMap *csm,
+ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
              UChar *dest, int32_t destCapacity,
              const UChar *src, int32_t srcLength,
              UStringCaseMapper *stringCaseMapper,
              icu::Edits *edits,
-             UErrorCode *pErrorCode);
+             UErrorCode &errorCode);
 
 /**
  * Common string case mapping implementation for old-fashioned u_strToXyz() functions
@@ -184,11 +193,11 @@ ustrcase_map(const UCaseMap *csm,
  * Implements argument checking and internally works with an intermediate buffer if necessary.
  */
 U_CFUNC int32_t
-ustrcase_mapWithOverlap(const UCaseMap *csm,
+ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
                         UChar *dest, int32_t destCapacity,
                         const UChar *src, int32_t srcLength,
                         UStringCaseMapper *stringCaseMapper,
-                        UErrorCode *pErrorCode);
+                        UErrorCode &errorCode);
 
 /**
  * UTF-8 string case mapping function type, used by ucasemap_mapUTF8().
index 463ba2c546fa65bc0ffe8f61c52daffda3c2a567..bf4f252c5416203509689dcb42c307d01fe68018 100644 (file)
@@ -22,6 +22,7 @@
 #if !UCONFIG_NO_BREAK_ITERATION
 
 #include "unicode/brkiter.h"
+#include "unicode/localpointer.h"
 #include "unicode/ubrk.h"
 #include "unicode/ucasemap.h"
 #include "cmemory.h"
@@ -57,20 +58,51 @@ u_strToTitle(UChar *dest, int32_t destCapacity,
              UErrorCode *pErrorCode) {
     UCaseMap csm=UCASEMAP_INITIALIZER;
     setTempCaseMap(&csm, locale);
+    icu::LocalPointer<icu::BreakIterator> ownedIter;
+    icu::BreakIterator *iter;
     if(titleIter!=NULL) {
-        ubrk_setText(csm.iter=titleIter, src, srcLength, pErrorCode);
+        iter=reinterpret_cast<icu::BreakIterator *>(titleIter);
     } else {
-        csm.iter=ubrk_open(UBRK_WORD, csm.locale, src, srcLength, pErrorCode);
+        iter=icu::BreakIterator::createWordInstance(icu::Locale(csm.locale), *pErrorCode);
+        ownedIter.adoptInstead(iter);
     }
-    int32_t length=ustrcase_mapWithOverlap(
-        &csm,
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
+    }
+    icu::UnicodeString s(srcLength<0, src, srcLength);
+    iter->setText(s);
+    return ustrcase_mapWithOverlap(
+        &csm, iter,
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalToTitle, pErrorCode);
-    if(titleIter==NULL && csm.iter!=NULL) {
-        ubrk_close(csm.iter);
+        ustrcase_internalToTitle, *pErrorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitleWithEdits(const UCaseMap *csm, icu::BreakIterator *iter,
+                          UChar *dest, int32_t destCapacity,
+                          const UChar *src, int32_t srcLength,
+                          icu::Edits *edits,
+                          UErrorCode &errorCode) {
+    icu::LocalPointer<icu::BreakIterator> ownedIter;
+    if(iter==NULL) {
+        if(csm->iter!=NULL) {
+            iter=csm->iter->clone();
+        } else {
+            iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), errorCode);
+        }
+        ownedIter.adoptInsteadAndCheckErrorCode(iter, errorCode);
+    }
+    if(U_FAILURE(errorCode)) {
+        return 0;
     }
-    return length;
+    icu::UnicodeString s(srcLength<0, src, srcLength);
+    iter->setText(s);
+    return ustrcase_map(
+        csm, iter,
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalToTitle, edits, errorCode);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -78,16 +110,19 @@ ucasemap_toTitle(UCaseMap *csm,
                  UChar *dest, int32_t destCapacity,
                  const UChar *src, int32_t srcLength,
                  UErrorCode *pErrorCode) {
-    if(csm->iter!=NULL) {
-        ubrk_setText(csm->iter, src, srcLength, pErrorCode);
-    } else {
-        csm->iter=ubrk_open(UBRK_WORD, csm->locale, src, srcLength, pErrorCode);
+    if(csm->iter==NULL) {
+        csm->iter=icu::BreakIterator::createWordInstance(icu::Locale(csm->locale), *pErrorCode);
+    }
+    if(U_FAILURE(*pErrorCode)) {
+        return 0;
     }
+    icu::UnicodeString s(srcLength<0, src, srcLength);
+    csm->iter->setText(s);
     return ustrcase_map(
-        csm,
+        csm, csm->iter,
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalToTitle, NULL, pErrorCode);
+        ustrcase_internalToTitle, NULL, *pErrorCode);
 }
 
 #endif  // !UCONFIG_NO_BREAK_ITERATION
index 35b5d6370dab40295307107019d7a8844fb2893c..c437683c6f4af202ed903c2224251a45a2cd221e 100644 (file)
@@ -207,7 +207,7 @@ UBool Edits::growArray() {
     return TRUE;
 }
 
-UBool Edits::setErrorCode(UErrorCode &outErrorCode) {
+UBool Edits::copyErrorTo(UErrorCode &outErrorCode) {
     if (U_FAILURE(outErrorCode)) { return TRUE; }
     if (U_SUCCESS(errorCode)) { return FALSE; }
     outErrorCode = errorCode;
@@ -374,6 +374,22 @@ UBool Edits::Iterator::findSourceIndex(int32_t i, UErrorCode &errorCode) {
     return FALSE;
 }
 
+namespace {
+
+int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
+                                   Edits *edits, UErrorCode &errorCode) {
+    if (U_SUCCESS(errorCode)) {
+        if (destIndex > destCapacity) {
+            errorCode = U_BUFFER_OVERFLOW_ERROR;
+        } else if (edits != NULL) {
+            edits->copyErrorTo(errorCode);
+        }
+    }
+    return destIndex;
+}
+
+}  // namespace
+
 U_NAMESPACE_END
 
 U_NAMESPACE_USE
@@ -527,7 +543,7 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
          const UChar *src, UCaseContext *csc,
          int32_t srcStart, int32_t srcLimit,
          icu::Edits *edits,
-         UErrorCode *pErrorCode) {
+         UErrorCode &errorCode) {
     int32_t locCache=csm->locCache;
 
     /* case mapping loop */
@@ -544,33 +560,26 @@ _caseMap(const UCaseMap *csm, UCaseMapFull *map,
         destIndex = appendResult(dest, destIndex, destCapacity, c, s,
                                  srcIndex - cpStart, edits);
         if (destIndex < 0) {
-            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
             return 0;
         }
     }
 
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
     return destIndex;
 }
 
 #if !UCONFIG_NO_BREAK_ITERATION
 
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToTitle(const UCaseMap *csm,
+ustrcase_internalToTitle(const UCaseMap *csm, BreakIterator *iter,
                          UChar *dest, int32_t destCapacity,
                          const UChar *src, int32_t srcLength,
                          icu::Edits *edits,
-                         UErrorCode *pErrorCode) {
-    if(U_FAILURE(*pErrorCode)) {
+                         UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
         return 0;
     }
 
-    // Use the C++ abstract base class to minimize dependencies.
-    // TODO: Change UCaseMap.iter to store a BreakIterator directly.
-    BreakIterator *bi=reinterpret_cast<BreakIterator *>(csm->iter);
-
     /* set up local variables */
     int32_t locCache=csm->locCache;
     UCaseContext csc=UCASECONTEXT_INITIALIZER;
@@ -586,9 +595,9 @@ ustrcase_internalToTitle(const UCaseMap *csm,
         int32_t idx;
         if(isFirstIndex) {
             isFirstIndex=FALSE;
-            idx=bi->first();
+            idx=iter->first();
         } else {
-            idx=bi->next();
+            idx=iter->next();
         }
         if(idx==UBRK_DONE || idx>srcLength) {
             idx=srcLength;
@@ -632,7 +641,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
                 destIndex=appendUnchanged(dest, destIndex, destCapacity,
                                           src+prev, titleStart-prev, edits);
                 if(destIndex<0) {
-                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                     return 0;
                 }
             }
@@ -647,7 +656,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
                 destIndex=appendResult(dest, destIndex, destCapacity, c, s,
                                        titleLimit-titleStart, edits);
                 if(destIndex<0) {
-                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                     return 0;
                 }
 
@@ -658,7 +667,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
                         (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
                     destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
                     if(destIndex<0) {
-                        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                         return 0;
                     }
                     if(edits!=NULL) {
@@ -677,11 +686,11 @@ ustrcase_internalToTitle(const UCaseMap *csm,
                                 dest+destIndex, destCapacity-destIndex,
                                 src, &csc,
                                 titleLimit, idx,
-                                edits, pErrorCode);
-                        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                            *pErrorCode=U_ZERO_ERROR;
+                                edits, errorCode);
+                        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+                            errorCode=U_ZERO_ERROR;
                         }
-                        if(U_FAILURE(*pErrorCode)) {
+                        if(U_FAILURE(errorCode)) {
                             return destIndex;
                         }
                     } else {
@@ -689,7 +698,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
                         destIndex=appendUnchanged(dest, destIndex, destCapacity,
                                                   src+titleLimit, idx-titleLimit, edits);
                         if(destIndex<0) {
-                            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                             return 0;
                         }
                     }
@@ -700,10 +709,7 @@ ustrcase_internalToTitle(const UCaseMap *csm,
         prev=idx;
     }
 
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
-    return destIndex;
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }
 
 #endif  // !UCONFIG_NO_BREAK_ITERATION
@@ -1191,7 +1197,7 @@ int32_t toUpper(const UCaseMap *csm,
                 UChar *dest, int32_t destCapacity,
                 const UChar *src, int32_t srcLength,
                 Edits *edits,
-                UErrorCode *pErrorCode) {
+                UErrorCode &errorCode) {
     int32_t locCache = UCASE_LOC_GREEK;
     int32_t destIndex=0;
     uint32_t state = 0;
@@ -1310,7 +1316,7 @@ int32_t toUpper(const UCaseMap *csm,
                     --numYpogegrammeni;
                 }
                 if(destIndex<0) {
-                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                     return 0;
                 }
             }
@@ -1320,7 +1326,7 @@ int32_t toUpper(const UCaseMap *csm,
             destIndex = appendResult(dest, destIndex, destCapacity, c, s,
                                      nextIndex - i, edits);
             if (destIndex < 0) {
-                *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
                 return 0;
             }
         }
@@ -1328,10 +1334,7 @@ int32_t toUpper(const UCaseMap *csm,
         state = nextState;
     }
 
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
-    return destIndex;
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }
 
 }  // namespace GreekUpper
@@ -1340,47 +1343,49 @@ U_NAMESPACE_END
 /* functions available in the common library (for unistr_case.cpp) */
 
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToLower(const UCaseMap *csm,
+ustrcase_internalToLower(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
                          UChar *dest, int32_t destCapacity,
                          const UChar *src, int32_t srcLength,
                          icu::Edits *edits,
-                         UErrorCode *pErrorCode) {
+                         UErrorCode &errorCode) {
     UCaseContext csc=UCASECONTEXT_INITIALIZER;
     csc.p=(void *)src;
     csc.limit=srcLength;
-    return _caseMap(
+    int32_t destIndex = _caseMap(
         csm, ucase_toFullLower,
         dest, destCapacity,
         src, &csc, 0, srcLength,
-        edits, pErrorCode);
+        edits, errorCode);
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }
 
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalToUpper(const UCaseMap *csm,
+ustrcase_internalToUpper(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
                          UChar *dest, int32_t destCapacity,
                          const UChar *src, int32_t srcLength,
                          icu::Edits *edits,
-                         UErrorCode *pErrorCode) {
+                         UErrorCode &errorCode) {
     int32_t locCache = csm->locCache;
     if (ucase_getCaseLocale(csm->locale, &locCache) == UCASE_LOC_GREEK) {
-        return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
+        return GreekUpper::toUpper(csm, dest, destCapacity, src, srcLength, edits, errorCode);
     }
     UCaseContext csc=UCASECONTEXT_INITIALIZER;
     csc.p=(void *)src;
     csc.limit=srcLength;
-    return _caseMap(
+    int32_t destIndex = _caseMap(
         csm, ucase_toFullUpper,
         dest, destCapacity,
         src, &csc, 0, srcLength,
-        edits, pErrorCode);
+        edits, errorCode);
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }
 
 U_CFUNC int32_t U_CALLCONV
-ustrcase_internalFold(const UCaseMap *csm,
+ustrcase_internalFold(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_UNUSED
                       UChar *dest, int32_t destCapacity,
                       const UChar *src, int32_t srcLength,
                       icu::Edits *edits,
-                      UErrorCode *pErrorCode) {
+                      UErrorCode &errorCode) {
     /* case mapping loop */
     int32_t srcIndex = 0;
     int32_t destIndex = 0;
@@ -1393,29 +1398,25 @@ ustrcase_internalFold(const UCaseMap *csm,
         destIndex = appendResult(dest, destIndex, destCapacity, c, s,
                                  srcIndex - cpStart, edits);
         if (destIndex < 0) {
-            *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
             return 0;
         }
     }
 
-    // TODO: are these internal functions called where destIndex>destCapacity is not already checked? (see u_terminateUChars())
-    if (destIndex > destCapacity) {
-        *pErrorCode = U_BUFFER_OVERFLOW_ERROR;
-    }
-    return destIndex;
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
 }
 
 U_CFUNC int32_t
-ustrcase_map(const UCaseMap *csm,
+ustrcase_map(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
              UChar *dest, int32_t destCapacity,
              const UChar *src, int32_t srcLength,
              UStringCaseMapper *stringCaseMapper,
              icu::Edits *edits,
-             UErrorCode *pErrorCode) {
+             UErrorCode &errorCode) {
     int32_t destLength;
 
     /* check argument values */
-    if(U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(errorCode)) {
         return 0;
     }
     if( destCapacity<0 ||
@@ -1423,7 +1424,7 @@ ustrcase_map(const UCaseMap *csm,
         src==NULL ||
         srcLength<-1
     ) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
 
@@ -1437,27 +1438,28 @@ ustrcase_map(const UCaseMap *csm,
         ((src>=dest && src<(dest+destCapacity)) ||
          (dest>=src && dest<(src+srcLength)))
     ) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
 
-    destLength=stringCaseMapper(csm, dest, destCapacity, src, srcLength, edits, pErrorCode);
-    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
+    destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
+                                dest, destCapacity, src, srcLength, edits, errorCode);
+    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
 }
 
 U_CFUNC int32_t
-ustrcase_mapWithOverlap(const UCaseMap *csm,
+ustrcase_mapWithOverlap(const UCaseMap *csm, UCASEMAP_BREAK_ITERATOR_PARAM
                         UChar *dest, int32_t destCapacity,
                         const UChar *src, int32_t srcLength,
                         UStringCaseMapper *stringCaseMapper,
-                        UErrorCode *pErrorCode) {
+                        UErrorCode &errorCode) {
     UChar buffer[300];
     UChar *temp;
 
     int32_t destLength;
 
     /* check argument values */
-    if(U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(errorCode)) {
         return 0;
     }
     if( destCapacity<0 ||
@@ -1465,7 +1467,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
         src==NULL ||
         srcLength<-1
     ) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
 
@@ -1487,7 +1489,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
             /* allocate a buffer */
             temp=(UChar *)uprv_malloc(destCapacity*U_SIZEOF_UCHAR);
             if(temp==NULL) {
-                *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
+                errorCode=U_MEMORY_ALLOCATION_ERROR;
                 return 0;
             }
         }
@@ -1495,10 +1497,11 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
         temp=dest;
     }
 
-    destLength=stringCaseMapper(csm, temp, destCapacity, src, srcLength, NULL, pErrorCode);
+    destLength=stringCaseMapper(csm, UCASEMAP_BREAK_ITERATOR
+                                temp, destCapacity, src, srcLength, NULL, errorCode);
     if(temp!=dest) {
         /* copy the result string to the destination buffer */
-        if (U_SUCCESS(*pErrorCode) && 0 < destLength && destLength <= destCapacity) {
+        if (U_SUCCESS(errorCode) && 0 < destLength && destLength <= destCapacity) {
             u_memmove(dest, temp, destLength);
         }
         if(temp!=buffer) {
@@ -1506,7 +1509,7 @@ ustrcase_mapWithOverlap(const UCaseMap *csm,
         }
     }
 
-    return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
+    return u_terminateUChars(dest, destCapacity, destLength, &errorCode);
 }
 
 /* public API functions */
@@ -1520,10 +1523,10 @@ u_strFoldCase(UChar *dest, int32_t destCapacity,
     csm.csp=ucase_getSingleton();
     csm.options=options;
     return ustrcase_mapWithOverlap(
-        &csm,
+        &csm, UCASEMAP_BREAK_ITERATOR_NULL
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalFold, pErrorCode);
+        ustrcase_internalFold, *pErrorCode);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -1531,12 +1534,12 @@ ucasemap_toLowerWithEdits(const UCaseMap *csm,
                           UChar *dest, int32_t destCapacity,
                           const UChar *src, int32_t srcLength,
                           icu::Edits *edits,
-                          UErrorCode *pErrorCode) {
+                          UErrorCode &errorCode) {
     return ustrcase_map(
-        csm,
+        csm, UCASEMAP_BREAK_ITERATOR_NULL
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalToLower, edits, pErrorCode);
+        ustrcase_internalToLower, edits, errorCode);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -1544,12 +1547,25 @@ ucasemap_toUpperWithEdits(const UCaseMap *csm,
                           UChar *dest, int32_t destCapacity,
                           const UChar *src, int32_t srcLength,
                           icu::Edits *edits,
-                          UErrorCode *pErrorCode) {
+                          UErrorCode &errorCode) {
+    return ustrcase_map(
+        csm, UCASEMAP_BREAK_ITERATOR_NULL
+        dest, destCapacity,
+        src, srcLength,
+        ustrcase_internalToUpper, edits, errorCode);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_foldCaseWithEdits(const UCaseMap *csm,
+                           UChar *dest, int32_t destCapacity,
+                           const UChar *src, int32_t srcLength,
+                           icu::Edits *edits,
+                           UErrorCode &errorCode) {
     return ustrcase_map(
-        csm,
+        csm, UCASEMAP_BREAK_ITERATOR_NULL
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalToUpper, edits, pErrorCode);
+        ustrcase_internalFold, edits, errorCode);
 }
 
 /* case-insensitive string comparisons -------------------------------------- */
index 20749e1d66ed896411bc5d486ad292e0662990a1..5e6e1418247a93b69db6f2d59fcc20ae64a430bb 100644 (file)
@@ -91,10 +91,10 @@ u_strToLower(UChar *dest, int32_t destCapacity,
     UCaseMap csm=UCASEMAP_INITIALIZER;
     setTempCaseMap(&csm, locale);
     return ustrcase_mapWithOverlap(
-        &csm,
+        &csm, UCASEMAP_BREAK_ITERATOR_NULL
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalToLower, pErrorCode);
+        ustrcase_internalToLower, *pErrorCode);
 }
 
 U_CAPI int32_t U_EXPORT2
@@ -105,8 +105,8 @@ u_strToUpper(UChar *dest, int32_t destCapacity,
     UCaseMap csm=UCASEMAP_INITIALIZER;
     setTempCaseMap(&csm, locale);
     return ustrcase_mapWithOverlap(
-        &csm,
+        &csm, UCASEMAP_BREAK_ITERATOR_NULL
         dest, destCapacity,
         src, srcLength,
-        ustrcase_internalToUpper, pErrorCode);
+        ustrcase_internalToUpper, *pErrorCode);
 }