ICU-12988 CaseMap UTF-8 with Edits; ported from UTF-16 changes in ICU-12410 r39684

author Markus Scherer <markus.icu@gmail.com>

Tue, 14 Mar 2017 23:55:29 +0000 (23:55 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Tue, 14 Mar 2017 23:55:29 +0000 (23:55 +0000)
author Markus Scherer <markus.icu@gmail.com>
Tue, 14 Mar 2017 23:55:29 +0000 (23:55 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Tue, 14 Mar 2017 23:55:29 +0000 (23:55 +0000)
diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp

index 3a1423685fc6023f9f3cd12d6c33dcdbfed8920a..391140d6c5e2b9d9703d4e023871429992314204 100644 (file)
--- a/icu4c/source/common/ucasemap.cpp
+++ b/icu4c/source/common/ucasemap.cpp
@@ -20,6 +20,8 @@
  
  #include "unicode/utypes.h"
  #include "unicode/brkiter.h"
+#include "unicode/casemap.h"
+#include "unicode/edits.h"
  #include "unicode/ubrk.h"
  #include "unicode/uloc.h"
  #include "unicode/ustring.h"
@@ -32,10 +34,32 @@
  #include "unicode/utf16.h"
  #include "cmemory.h"
  #include "cstring.h"
+#include "uassert.h"
  #include "ucase.h"
  #include "ucasemap_imp.h"
  #include "ustr_imp.h"
  
+U_NAMESPACE_BEGIN
+
+namespace {
+
+// TODO: share with UTF-16? inline in ucasemap_imp.h?
+int32_t checkOverflowAndEditsError(int32_t destIndex, int32_t destCapacity,
+                                   Edits *edits, UErrorCode &errorCode) {
+    if (U_SUCCESS(errorCode)) {
+        if (destIndex > destCapacity) {
+            errorCode = U_BUFFER_OVERFLOW_ERROR;
+        } else if (edits != NULL) {
+            edits->copyErrorTo(errorCode);
+        }
+    }
+    return destIndex;
+}
+
+}  // namespace
+
+U_NAMESPACE_END
+
  U_NAMESPACE_USE
  
  /* UCaseMap service object -------------------------------------------------- */
@@ -124,12 +148,13 @@ ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode) {
  
  /* UTF-8 string case mappings ----------------------------------------------- */
  
-/* TODO(markus): Move to a new, separate utf8case.c file. */
+/* TODO(markus): Move to a new, separate utf8case.cpp file. */
  
  /* append a full case mapping result, see UCASE_MAX_STRING_LENGTH */
  static inline int32_t
  appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
-             int32_t result, const UChar *s) {
+             int32_t result, const UChar *s,
+             int32_t cpLength, uint32_t options, icu::Edits *edits) {
      UChar32 c;
      int32_t length;
      UErrorCode errorCode;
@@ -137,86 +162,126 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
      /* decode the result */
      if(result<0) {
          /* (not) original code point */
+        if(edits!=NULL) {
+            edits->addUnchanged(cpLength);
+            if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
+                return destIndex;
+            }
+        }
          c=~result;
-        length=U8_LENGTH(c);
-    } else if(result<=UCASE_MAX_STRING_LENGTH) {
-        c=U_SENTINEL;
-        length=result;
+        if(destIndex<destCapacity && c<=0x7f) {  // ASCII slightly-fastpath
+            dest[destIndex++]=(uint8_t)c;
+            return destIndex;
+        }
+        length=cpLength;
      } else {
-        c=result;
-        length=U8_LENGTH(c);
+        if(result<=UCASE_MAX_STRING_LENGTH) {
+            // string: "result" is the UTF-16 length
+            errorCode=U_ZERO_ERROR;
+            if(destIndex<destCapacity) {
+                u_strToUTF8((char *)(dest+destIndex), destCapacity-destIndex, &length,
+                            s, result, &errorCode);
+            } else {
+                u_strToUTF8(NULL, 0, &length, s, result, &errorCode);
+            }
+            if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
+                return -1;
+            }
+            if(length>(INT32_MAX-destIndex)) {
+                return -1;  // integer overflow
+            }
+            if(edits!=NULL) {
+                edits->addReplace(cpLength, length);
+            }
+            // We might have an overflow, but we know the actual length.
+            return destIndex+length;
+        } else if(destIndex<destCapacity && result<=0x7f) {  // ASCII slightly-fastpath
+            dest[destIndex++]=(uint8_t)result;
+            if(edits!=NULL) {
+                edits->addReplace(cpLength, 1);
+            }
+            return destIndex;
+        } else {
+            c=result;
+            length=U8_LENGTH(c);
+            if(edits!=NULL) {
+                edits->addReplace(cpLength, length);
+            }
+        }
      }
+    // c>=0 single code point
      if(length>(INT32_MAX-destIndex)) {
          return -1;  // integer overflow
      }
  
      if(destIndex<destCapacity) {
          /* append the result */
-        if(c>=0) {
-            /* code point */
-            UBool isError=FALSE;
-            U8_APPEND(dest, destIndex, destCapacity, c, isError);
-            if(isError) {
-                /* overflow, nothing written */
-                destIndex+=length;
-            }
-        } else {
-            /* string */
-            int32_t destLength;
-            errorCode=U_ZERO_ERROR;
-            u_strToUTF8(
-                (char *)(dest+destIndex), destCapacity-destIndex, &destLength,
-                s, length,
-                &errorCode);
-            if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
-                return -1;
-            }
-            if(destLength>(INT32_MAX-destIndex)) {
-                return -1;  // integer overflow
-            }
-            destIndex+=destLength;
-            /* we might have an overflow, but we know the actual length */
+        UBool isError=FALSE;
+        U8_APPEND(dest, destIndex, destCapacity, c, isError);
+        if(isError) {
+            /* overflow, nothing written */
+            destIndex+=length;
          }
      } else {
          /* preflight */
-        if(c>=0) {
-            destIndex+=length;
-        } else {
-            int32_t destLength;
-            errorCode=U_ZERO_ERROR;
-            u_strToUTF8(
-                NULL, 0, &destLength,
-                s, length,
-                &errorCode);
-            if(U_FAILURE(errorCode) && errorCode != U_BUFFER_OVERFLOW_ERROR) {
-                return -1;
-            }
-            if(destLength>(INT32_MAX-destIndex)) {
-                return -1;  // integer overflow
-            }
-            destIndex+=destLength;
-        }
+        destIndex+=length;
      }
      return destIndex;
  }
  
  static inline int32_t
-appendUChar(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar c) {
-    int32_t length=U8_LENGTH(c);
-    if(length>(INT32_MAX-destIndex)) {
+appendASCII(uint8_t *dest, int32_t destIndex, int32_t destCapacity, uint8_t c) {
+    if(destIndex<destCapacity) {
+        dest[destIndex]=c;
+    } else if(destIndex==INT32_MAX) {
+        return -1;  // integer overflow
+    }
+    return destIndex+1;
+}
+
+// See unicode/utf8.h U8_APPEND_UNSAFE().
+static inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
+static inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
+
+static inline int32_t
+appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, UChar32 c) {
+    U_ASSERT(0x370 <= c && c <= 0x3ff);  // 2-byte UTF-8, main Greek block
+    if(2>(INT32_MAX-destIndex)) {
+        return -1;  // integer overflow
+    }
+    int32_t limit=destIndex+2;
+    if(limit<=destCapacity) {
+        dest+=destIndex;
+        dest[0]=getTwoByteLead(c);
+        dest[1]=getTwoByteTrail(c);
+    }
+    return limit;
+}
+
+static inline int32_t
+appendTwoBytes(uint8_t *dest, int32_t destIndex, int32_t destCapacity, const char *s) {
+    if(2>(INT32_MAX-destIndex)) {
          return -1;  // integer overflow
      }
-    int32_t limit=destIndex+length;
+    int32_t limit=destIndex+2;
      if(limit<=destCapacity) {
-        U8_APPEND_UNSAFE(dest, destIndex, c);
+        dest+=destIndex;
+        dest[0]=(uint8_t)s[0];
+        dest[1]=(uint8_t)s[1];
      }
      return limit;
  }
  
  static inline int32_t
-appendString(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
-             const uint8_t *s, int32_t length) {
+appendUnchanged(uint8_t *dest, int32_t destIndex, int32_t destCapacity,
+                const uint8_t *s, int32_t length, uint32_t options, icu::Edits *edits) {
      if(length>0) {
+        if(edits!=NULL) {
+            edits->addUnchanged(length);
+            if(options & UCASEMAP_OMIT_UNCHANGED_TEXT) {
+                return destIndex;
+            }
+        }
          if(length>(INT32_MAX-destIndex)) {
              return -1;  // integer overflow
          }
@@ -265,47 +330,41 @@ utf8_caseContextIterator(void *context, int8_t dir) {
   * context [0..srcLength[ into account.
   */
  static int32_t
-_caseMap(int32_t caseLocale, uint32_t /* TODO: options */, UCaseMapFull *map,
+_caseMap(int32_t caseLocale, uint32_t options, UCaseMapFull *map,
           uint8_t *dest, int32_t destCapacity,
           const uint8_t *src, UCaseContext *csc,
           int32_t srcStart, int32_t srcLimit,
-         UErrorCode *pErrorCode) {
-    const UChar *s = NULL;
-    UChar32 c, c2 = 0;
-    int32_t srcIndex, destIndex;
-
+         icu::Edits *edits,
+         UErrorCode &errorCode) {
      /* case mapping loop */
-    srcIndex=srcStart;
-    destIndex=0;
+    int32_t srcIndex=srcStart;
+    int32_t destIndex=0;
      while(srcIndex<srcLimit) {
-        csc->cpStart=srcIndex;
+        int32_t cpStart;
+        csc->cpStart=cpStart=srcIndex;
+        UChar32 c;
          U8_NEXT(src, srcIndex, srcLimit, c);
          csc->cpLimit=srcIndex;
          if(c<0) {
              // Malformed UTF-8.
-            destIndex=appendString(dest, destIndex, destCapacity, src+csc->cpStart, srcIndex-csc->cpStart);
+            destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                      src+cpStart, srcIndex-cpStart, options, edits);
              if(destIndex<0) {
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                  return 0;
              }
              continue;
          }
+        const UChar *s;
          c=map(c, utf8_caseContextIterator, csc, &s, caseLocale);
-        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
-            /* fast path version of appendResult() for ASCII results */
-            dest[destIndex++]=(uint8_t)c2;
-        } else {
-            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
-            if(destIndex<0) {
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
+        destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+                                 srcIndex - cpStart, options, edits);
+        if (destIndex < 0) {
+            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
          }
      }
  
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
      return destIndex;
  }
  
@@ -316,13 +375,9 @@ ucasemap_internalUTF8ToTitle(
          int32_t caseLocale, uint32_t options, BreakIterator *iter,
          uint8_t *dest, int32_t destCapacity,
          const uint8_t *src, int32_t srcLength,
-        UErrorCode *pErrorCode) {
-    const UChar *s;
-    UChar32 c;
-    int32_t prev, titleStart, titleLimit, idx, destIndex;
-    UBool isFirstIndex;
-
-    if(U_FAILURE(*pErrorCode)) {
+        icu::Edits *edits,
+        UErrorCode &errorCode) {
+    if(U_FAILURE(errorCode)) {
          return 0;
      }
  
@@ -330,21 +385,22 @@ ucasemap_internalUTF8ToTitle(
      UCaseContext csc=UCASECONTEXT_INITIALIZER;
      csc.p=(void *)src;
      csc.limit=srcLength;
-    destIndex=0;
-    prev=0;
-    isFirstIndex=TRUE;
+    int32_t destIndex=0;
+    int32_t prev=0;
+    UBool isFirstIndex=TRUE;
  
      /* titlecasing loop */
      while(prev<srcLength) {
          /* find next index where to titlecase */
+        int32_t index;
          if(isFirstIndex) {
              isFirstIndex=FALSE;
-            idx=iter->first();
+            index=iter->first();
          } else {
-            idx=iter->next();
+            index=iter->next();
          }
-        if(idx==UBRK_DONE || idx>srcLength) {
-            idx=srcLength;
+        if(index==UBRK_DONE || index>srcLength) {
+            index=srcLength;
          }
  
          /*
@@ -360,29 +416,32 @@ ucasemap_internalUTF8ToTitle(
           * b) first case letter (titlecase)         [titleStart..titleLimit[
           * c) subsequent characters (lowercase)                 [titleLimit..index[
           */
-        if(prev<idx) {
+        if(prev<index) {
              /* find and copy uncased characters [prev..titleStart[ */
-            titleStart=titleLimit=prev;
-            U8_NEXT(src, titleLimit, idx, c);
+            int32_t titleStart=prev;
+            int32_t titleLimit=prev;
+            UChar32 c;
+            U8_NEXT(src, titleLimit, index, c);
              if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(c)) {
                  /* Adjust the titlecasing index (titleStart) to the next cased character. */
                  for(;;) {
                      titleStart=titleLimit;
-                    if(titleLimit==idx) {
+                    if(titleLimit==index) {
                          /*
                           * only uncased characters in [prev..index[
                           * stop with titleStart==titleLimit==index
                           */
                          break;
                      }
-                    U8_NEXT(src, titleLimit, idx, c);
+                    U8_NEXT(src, titleLimit, index, c);
                      if(UCASE_NONE!=ucase_getType(c)) {
                          break; /* cased letter at [titleStart..titleLimit[ */
                      }
                  }
-                destIndex=appendString(dest, destIndex, destCapacity, src+prev, titleStart-prev);
+                destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                          src+prev, titleStart-prev, options, edits);
                  if(destIndex<0) {
-                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                      return 0;
                  }
              }
@@ -392,27 +451,48 @@ ucasemap_internalUTF8ToTitle(
                  if(c>=0) {
                      csc.cpStart=titleStart;
                      csc.cpLimit=titleLimit;
+                    const UChar *s;
                      c=ucase_toFullTitle(c, utf8_caseContextIterator, &csc, &s, caseLocale);
-                    destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+                    destIndex=appendResult(dest, destIndex, destCapacity, c, s,
+                                           titleLimit-titleStart, options, edits);
                  } else {
                      // Malformed UTF-8.
-                    destIndex=appendString(dest, destIndex, destCapacity, src+titleStart, titleLimit-titleStart);
+                    destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                              src+titleStart, titleLimit-titleStart, options, edits);
                  }
                  if(destIndex<0) {
-                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                      return 0;
                  }
  
                  /* Special case Dutch IJ titlecasing */
-                if (titleStart+1 < idx &&
+                if (titleStart+1 < index &&
                          caseLocale == UCASE_LOC_DUTCH &&
-                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
-                        (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
-                    destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
-                    titleLimit++;
+                        (src[titleStart] == 0x0049 || src[titleStart] == 0x0069)) {
+                    if (src[titleStart+1] == 0x006A) {
+                        destIndex=appendASCII(dest, destIndex, destCapacity, 0x004A);
+                        if(destIndex<0) {
+                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                            return 0;
+                        }
+                        if(edits!=NULL) {
+                            edits->addReplace(1, 1);
+                        }
+                        titleLimit++;
+                    } else if (src[titleStart+1] == 0x004A) {
+                        // Keep the capital J from getting lowercased.
+                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                                  src+titleStart+1, 1, options, edits);
+                        if(destIndex<0) {
+                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                            return 0;
+                        }
+                        titleLimit++;
+                    }
                  }
+
                  /* lowercase [titleLimit..index[ */
-                if(titleLimit<idx) {
+                if(titleLimit<index) {
                      if((options&U_TITLECASE_NO_LOWERCASE)==0) {
                          /* Normal operation: Lowercase the rest of the word. */
                          destIndex+=
@@ -420,19 +500,20 @@ ucasemap_internalUTF8ToTitle(
                                  caseLocale, options, ucase_toFullLower,
                                  dest+destIndex, destCapacity-destIndex,
                                  src, &csc,
-                                titleLimit, idx,
-                                pErrorCode);
-                        if(*pErrorCode==U_BUFFER_OVERFLOW_ERROR) {
-                            *pErrorCode=U_ZERO_ERROR;
+                                titleLimit, index,
+                                edits, errorCode);
+                        if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
+                            errorCode=U_ZERO_ERROR;
                          }
-                        if(U_FAILURE(*pErrorCode)) {
+                        if(U_FAILURE(errorCode)) {
                              return destIndex;
                          }
                      } else {
                          /* Optionally just copy the rest of the word unchanged. */
-                        destIndex=appendString(dest, destIndex, destCapacity, src+titleLimit, idx-titleLimit);
+                        destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                                  src+titleLimit, index-titleLimit, options, edits);
                          if(destIndex<0) {
-                            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                              return 0;
                          }
                      }
@@ -440,13 +521,10 @@ ucasemap_internalUTF8ToTitle(
              }
          }
  
-        prev=idx;
+        prev=index;
      }
  
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
-    return destIndex;
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
  }
  
  #endif
@@ -471,10 +549,11 @@ UBool isFollowedByCasedLetter(const uint8_t *s, int32_t i, int32_t length) {
  }
  
  // Keep this consistent with the UTF-16 version in ustrcase.cpp and the Java version in CaseMap.java.
-int32_t toUpper(int32_t caseLocale, uint32_t /* TODO: options */,
+int32_t toUpper(uint32_t options,
                  uint8_t *dest, int32_t destCapacity,
                  const uint8_t *src, int32_t srcLength,
-                UErrorCode *pErrorCode) {
+                Edits *edits,
+                UErrorCode &errorCode) {
      int32_t destIndex=0;
      uint32_t state = 0;
      for (int32_t i = 0; i < srcLength;) {
@@ -550,40 +629,75 @@ int32_t toUpper(int32_t caseLocale, uint32_t /* TODO: options */,
                      data &= ~HAS_EITHER_DIALYTIKA;
                  }
              }
-            destIndex=appendUChar(dest, destIndex, destCapacity, (UChar)upper);
-            if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
-                destIndex=appendUChar(dest, destIndex, destCapacity, 0x308);  // restore or add a dialytika
-            }
-            if (destIndex >= 0 && addTonos) {
-                destIndex=appendUChar(dest, destIndex, destCapacity, 0x301);
-            }
-            while (destIndex >= 0 && numYpogegrammeni > 0) {
-                destIndex=appendUChar(dest, destIndex, destCapacity, 0x399);
-                --numYpogegrammeni;
-            }
-            if(destIndex<0) {
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
+
+            UBool change = TRUE;
+            if (edits != NULL) {
+                // Find out first whether we are changing the text.
+                U_ASSERT(0x370 <= upper && upper <= 0x3ff);  // 2-byte UTF-8, main Greek block
+                change = (i + 2) > nextIndex ||
+                        src[i] != getTwoByteLead(upper) || src[i + 1] != getTwoByteTrail(upper) ||
+                        numYpogegrammeni > 0;
+                int32_t i2 = i + 2;
+                if ((data & HAS_EITHER_DIALYTIKA) != 0) {
+                    change |= (i2 + 2) > nextIndex ||
+                            src[i2] != (uint8_t)u8"\u0308"[0] ||
+                            src[i2 + 1] != (uint8_t)u8"\u0308"[1];
+                    i2 += 2;
+                }
+                if (addTonos) {
+                    change |= (i2 + 2) > nextIndex ||
+                            src[i2] != (uint8_t)u8"\u0301"[0] ||
+                            src[i2 + 1] != (uint8_t)u8"\u0301"[1];
+                    i2 += 2;
+                }
+                int32_t oldLength = nextIndex - i;
+                int32_t newLength = (i2 - i) + numYpogegrammeni * 2;  // 2 bytes per U+0399
+                change |= oldLength != newLength;
+                if (change) {
+                    if (edits != NULL) {
+                        edits->addReplace(oldLength, newLength);
+                    }
+                } else {
+                    if (edits != NULL) {
+                        edits->addUnchanged(oldLength);
+                    }
+                    // Write unchanged text?
+                    change = (options & UCASEMAP_OMIT_UNCHANGED_TEXT) == 0;
+                }
              }
-        } else if(c>=0) {
-            const UChar *s;
-            UChar32 c2 = 0;
-            c=ucase_toFullUpper(c, NULL, NULL, &s, caseLocale);
-            if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
-                /* fast path version of appendResult() for ASCII results */
-                dest[destIndex++]=(uint8_t)c2;
-            } else {
-                destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+
+            if (change) {
+                destIndex=appendTwoBytes(dest, destIndex, destCapacity, upper);
+                if (destIndex >= 0 && (data & HAS_EITHER_DIALYTIKA) != 0) {
+                    destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0308");  // restore or add a dialytika
+                }
+                if (destIndex >= 0 && addTonos) {
+                    destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0301");
+                }
+                while (destIndex >= 0 && numYpogegrammeni > 0) {
+                    destIndex=appendTwoBytes(dest, destIndex, destCapacity, u8"\u0399");
+                    --numYpogegrammeni;
+                }
                  if(destIndex<0) {
-                    *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                    errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                      return 0;
                  }
              }
+        } else if(c>=0) {
+            const UChar *s;
+            c=ucase_toFullUpper(c, NULL, NULL, &s, UCASE_LOC_GREEK);
+            destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+                                     nextIndex - i, options, edits);
+            if (destIndex < 0) {
+                errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+                return 0;
+            }
          } else {
              // Malformed UTF-8.
-            destIndex=appendString(dest, destIndex, destCapacity, src+i, nextIndex-i);
+            destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                      src+i, nextIndex-i, options, edits);
              if(destIndex<0) {
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                  return 0;
              }
          }
@@ -591,9 +705,6 @@ int32_t toUpper(int32_t caseLocale, uint32_t /* TODO: options */,
          state = nextState;
      }
  
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
      return destIndex;
  }
  
@@ -604,77 +715,76 @@ static int32_t U_CALLCONV
  ucasemap_internalUTF8ToLower(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
                               uint8_t *dest, int32_t destCapacity,
                               const uint8_t *src, int32_t srcLength,
-                             UErrorCode *pErrorCode) {
+                             icu::Edits *edits,
+                             UErrorCode &errorCode) {
      UCaseContext csc=UCASECONTEXT_INITIALIZER;
      csc.p=(void *)src;
      csc.limit=srcLength;
-    return _caseMap(
+    int32_t destIndex = _caseMap(
          caseLocale, options, ucase_toFullLower,
          dest, destCapacity,
          src, &csc, 0, srcLength,
-        pErrorCode);
+        edits, errorCode);
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
  }
  
  static int32_t U_CALLCONV
  ucasemap_internalUTF8ToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
                               uint8_t *dest, int32_t destCapacity,
                               const uint8_t *src, int32_t srcLength,
-                             UErrorCode *pErrorCode) {
+                             icu::Edits *edits,
+                             UErrorCode &errorCode) {
+    int32_t destIndex;
      if (caseLocale == UCASE_LOC_GREEK) {
-        return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, pErrorCode);
+        destIndex = GreekUpper::toUpper(options, dest, destCapacity,
+                                        src, srcLength, edits, errorCode);
+    } else {
+        UCaseContext csc=UCASECONTEXT_INITIALIZER;
+        csc.p=(void *)src;
+        csc.limit=srcLength;
+        destIndex = _caseMap(
+            caseLocale, options, ucase_toFullUpper,
+            dest, destCapacity,
+            src, &csc, 0, srcLength,
+            edits, errorCode);
      }
-    UCaseContext csc=UCASECONTEXT_INITIALIZER;
-    csc.p=(void *)src;
-    csc.limit=srcLength;
-    return _caseMap(
-        caseLocale, options, ucase_toFullUpper,
-        dest, destCapacity,
-        src, &csc, 0, srcLength,
-        pErrorCode);
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
  }
  
  static int32_t U_CALLCONV
  ucasemap_internalUTF8Fold(int32_t /* caseLocale */, uint32_t options, UCASEMAP_BREAK_ITERATOR_UNUSED
                            uint8_t *dest, int32_t destCapacity,
                            const uint8_t *src, int32_t srcLength,
-                          UErrorCode *pErrorCode) {
-    int32_t srcIndex, destIndex;
-
-    const UChar *s;
-    UChar32 c, c2;
-    int32_t start;
-
+                          icu::Edits *edits,
+                          UErrorCode &errorCode) {
      /* case mapping loop */
-    srcIndex=destIndex=0;
-    while(srcIndex<srcLength) {
-        start=srcIndex;
+    int32_t srcIndex = 0;
+    int32_t destIndex = 0;
+    while (srcIndex < srcLength) {
+        int32_t cpStart = srcIndex;
+        UChar32 c;
          U8_NEXT(src, srcIndex, srcLength, c);
          if(c<0) {
              // Malformed UTF-8.
-            destIndex=appendString(dest, destIndex, destCapacity, src+start, srcIndex-start);
+            destIndex=appendUnchanged(dest, destIndex, destCapacity,
+                                      src+cpStart, srcIndex-cpStart, options, edits);
              if(destIndex<0) {
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
+                errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
                  return 0;
              }
              continue;
          }
-        c=ucase_toFullFolding(c, &s, options);
-        if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0x7f : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0x7f)) {
-            /* fast path version of appendResult() for ASCII results */
-            dest[destIndex++]=(uint8_t)c2;
-        } else {
-            destIndex=appendResult(dest, destIndex, destCapacity, c, s);
-            if(destIndex<0) {
-                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
-                return 0;
-            }
+        const UChar *s;
+        c = ucase_toFullFolding(c, &s, options);
+        destIndex = appendResult(dest, destIndex, destCapacity, c, s,
+                                 srcIndex - cpStart, options, edits);
+        if (destIndex < 0) {
+            errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
+            return 0;
          }
      }
  
-    if(destIndex>destCapacity) {
-        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
-    }
-    return destIndex;
+    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
  }
  
  U_CFUNC int32_t
@@ -682,11 +792,12 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
                   uint8_t *dest, int32_t destCapacity,
                   const uint8_t *src, int32_t srcLength,
                   UTF8CaseMapper *stringCaseMapper,
-                 UErrorCode *pErrorCode) {
+                 icu::Edits *edits,
+                 UErrorCode &errorCode) {
      int32_t destLength;
  
      /* check argument values */
-    if(U_FAILURE(*pErrorCode)) {
+    if(U_FAILURE(errorCode)) {
          return 0;
      }
      if( destCapacity<0 ||
@@ -694,7 +805,7 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
          src==NULL ||
          srcLength<-1
      ) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
          return 0;
      }
  
@@ -708,13 +819,16 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
          ((src>=dest && src<(dest+destCapacity)) ||
           (dest>=src && dest<(src+srcLength)))
      ) {
-        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+        errorCode=U_ILLEGAL_ARGUMENT_ERROR;
          return 0;
      }
  
+    if(edits!=NULL) {
+        edits->reset();
+    }
      destLength=stringCaseMapper(caseLocale, options, UCASEMAP_BREAK_ITERATOR
-                                dest, destCapacity, src, srcLength, pErrorCode);
-    return u_terminateChars((char *)dest, destCapacity, destLength, pErrorCode);
+                                dest, destCapacity, src, srcLength, edits, errorCode);
+    return u_terminateChars((char *)dest, destCapacity, destLength, &errorCode);
  }
  
  /* public API functions */
@@ -728,7 +842,7 @@ ucasemap_utf8ToLower(const UCaseMap *csm,
          csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
          (uint8_t *)dest, destCapacity,
          (const uint8_t *)src, srcLength,
-        ucasemap_internalUTF8ToLower, pErrorCode);
+        ucasemap_internalUTF8ToLower, NULL, *pErrorCode);
  }
  
  U_CAPI int32_t U_EXPORT2
@@ -740,7 +854,7 @@ ucasemap_utf8ToUpper(const UCaseMap *csm,
          csm->caseLocale, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
          (uint8_t *)dest, destCapacity,
          (const uint8_t *)src, srcLength,
-        ucasemap_internalUTF8ToUpper, pErrorCode);
+        ucasemap_internalUTF8ToUpper, NULL, *pErrorCode);
  }
  
  U_CAPI int32_t U_EXPORT2
@@ -752,5 +866,45 @@ ucasemap_utf8FoldCase(const UCaseMap *csm,
          UCASE_LOC_ROOT, csm->options, UCASEMAP_BREAK_ITERATOR_NULL
          (uint8_t *)dest, destCapacity,
          (const uint8_t *)src, srcLength,
-        ucasemap_internalUTF8Fold, pErrorCode);
+        ucasemap_internalUTF8Fold, NULL, *pErrorCode);
  }
+
+U_NAMESPACE_BEGIN
+
+int32_t CaseMap::utf8ToLower(
+        const char *locale, uint32_t options,
+        const char *src, int32_t srcLength,
+        char *dest, int32_t destCapacity, Edits *edits,
+        UErrorCode &errorCode) {
+    return ucasemap_mapUTF8(
+        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+        (uint8_t *)dest, destCapacity,
+        (const uint8_t *)src, srcLength,
+        ucasemap_internalUTF8ToLower, edits, errorCode);
+}
+
+int32_t CaseMap::utf8ToUpper(
+        const char *locale, uint32_t options,
+        const char *src, int32_t srcLength,
+        char *dest, int32_t destCapacity, Edits *edits,
+        UErrorCode &errorCode) {
+    return ucasemap_mapUTF8(
+        ustrcase_getCaseLocale(locale), options, UCASEMAP_BREAK_ITERATOR_NULL
+        (uint8_t *)dest, destCapacity,
+        (const uint8_t *)src, srcLength,
+        ucasemap_internalUTF8ToUpper, edits, errorCode);
+}
+
+int32_t CaseMap::utf8Fold(
+        uint32_t options,
+        const char *src, int32_t srcLength,
+        char *dest, int32_t destCapacity, Edits *edits,
+        UErrorCode &errorCode) {
+    return ucasemap_mapUTF8(
+        UCASE_LOC_ROOT, options, UCASEMAP_BREAK_ITERATOR_NULL
+        (uint8_t *)dest, destCapacity,
+        (const uint8_t *)src, srcLength,
+        ucasemap_internalUTF8Fold, edits, errorCode);
+}
+
+U_NAMESPACE_END
diff --git a/icu4c/source/common/ucasemap_imp.h b/icu4c/source/common/ucasemap_imp.h

index e400f4af1d08f780a4089ff72a6d9dbf45564977..79204226b00900aed4ea5418d5fa2d5aae8a9c24 100644 (file)
--- a/icu4c/source/common/ucasemap_imp.h
+++ b/icu4c/source/common/ucasemap_imp.h
@@ -172,7 +172,8 @@ UTF8CaseMapper(int32_t caseLocale, uint32_t options,
  #endif
                 uint8_t *dest, int32_t destCapacity,
                 const uint8_t *src, int32_t srcLength,
-               UErrorCode *pErrorCode);
+               icu::Edits *edits,
+               UErrorCode &errorCode);
  
  #if !UCONFIG_NO_BREAK_ITERATION
  
@@ -182,7 +183,8 @@ ucasemap_internalUTF8ToTitle(int32_t caseLocale, uint32_t options,
          icu::BreakIterator *iter,
          uint8_t *dest, int32_t destCapacity,
          const uint8_t *src, int32_t srcLength,
-        UErrorCode *pErrorCode);
+        icu::Edits *edits,
+        UErrorCode &errorCode);
  
  #endif
  
@@ -195,7 +197,8 @@ ucasemap_mapUTF8(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_ITERATOR_P
                   uint8_t *dest, int32_t destCapacity,
                   const uint8_t *src, int32_t srcLength,
                   UTF8CaseMapper *stringCaseMapper,
-                 UErrorCode *pErrorCode);
+                 icu::Edits *edits,
+                 UErrorCode &errorCode);
  
  U_NAMESPACE_BEGIN
  namespace GreekUpper {
diff --git a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp

index e956894cf83b67acb78c2079eb5f358a1c6d47d5..a253850fa290cf8f195e52449bd0ae1fcb4d1bb2 100644 (file)
--- a/icu4c/source/common/ucasemap_titlecase_brkiter.cpp
+++ b/icu4c/source/common/ucasemap_titlecase_brkiter.cpp
@@ -23,11 +23,45 @@
  
  #include "unicode/brkiter.h"
  #include "unicode/ubrk.h"
+#include "unicode/casemap.h"
  #include "unicode/ucasemap.h"
  #include "cmemory.h"
  #include "ucase.h"
  #include "ucasemap_imp.h"
  
+U_NAMESPACE_BEGIN
+
+int32_t CaseMap::utf8ToTitle(
+        const char *locale, uint32_t options, BreakIterator *iter,
+        const char *src, int32_t srcLength,
+        char *dest, int32_t destCapacity, Edits *edits,
+        UErrorCode &errorCode) {
+    if (U_FAILURE(errorCode)) {
+        return 0;
+    }
+    UText utext=UTEXT_INITIALIZER;
+    utext_openUTF8(&utext, src, srcLength, &errorCode);
+    LocalPointer<BreakIterator> ownedIter;
+    if(iter==NULL) {
+        iter=BreakIterator::createWordInstance(Locale(locale), errorCode);
+        ownedIter.adoptInstead(iter);
+    }
+    if(U_FAILURE(errorCode)) {
+        utext_close(&utext);
+        return 0;
+    }
+    iter->setText(&utext, errorCode);
+    int32_t length=ucasemap_mapUTF8(
+        ustrcase_getCaseLocale(locale), options, iter,
+        (uint8_t *)dest, destCapacity,
+        (const uint8_t *)src, srcLength,
+        ucasemap_internalUTF8ToTitle, edits, errorCode);
+    utext_close(&utext);
+    return length;
+}
+
+U_NAMESPACE_END
+
  U_NAMESPACE_USE
  
  U_CAPI const UBreakIterator * U_EXPORT2
@@ -65,7 +99,7 @@ ucasemap_utf8ToTitle(UCaseMap *csm,
              csm->caseLocale, csm->options, csm->iter,
              (uint8_t *)dest, destCapacity,
              (const uint8_t *)src, srcLength,
-            ucasemap_internalUTF8ToTitle, pErrorCode);
+            ucasemap_internalUTF8ToTitle, NULL, *pErrorCode);
      utext_close(&utext);
      return length;
  }
diff --git a/icu4c/source/common/unicode/casemap.h b/icu4c/source/common/unicode/casemap.h

index 2e685eef7aed1e54d45e371ad23717b5a1dc7a40..98184820d53457bf6f9f1dba4c41bffd39c35d0f 100644 (file)
--- a/icu4c/source/common/unicode/casemap.h
+++ b/icu4c/source/common/unicode/casemap.h
@@ -47,6 +47,7 @@ public:
       *                  without writing any of the result string.
       * @param edits     Records edits for index mapping, working with styled text,
       *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
       *                  This function calls edits->reset() first. edits can be NULL.
       * @param errorCode Reference to an in/out error code value
       *                  which must not indicate a failure before the function call.
@@ -81,6 +82,7 @@ public:
       *                  without writing any of the result string.
       * @param edits     Records edits for index mapping, working with styled text,
       *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
       *                  This function calls edits->reset() first. edits can be NULL.
       * @param errorCode Reference to an in/out error code value
       *                  which must not indicate a failure before the function call.
@@ -127,6 +129,7 @@ public:
       *                  without writing any of the result string.
       * @param edits     Records edits for index mapping, working with styled text,
       *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
       *                  This function calls edits->reset() first. edits can be NULL.
       * @param errorCode Reference to an in/out error code value
       *                  which must not indicate a failure before the function call.
@@ -168,6 +171,7 @@ public:
       *                  without writing any of the result string.
       * @param edits     Records edits for index mapping, working with styled text,
       *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
       *                  This function calls edits->reset() first. edits can be NULL.
       * @param errorCode Reference to an in/out error code value
       *                  which must not indicate a failure before the function call.
@@ -184,6 +188,164 @@ public:
              char16_t *dest, int32_t destCapacity, Edits *edits,
              UErrorCode &errorCode);
  
+    /**
+     * Lowercases a UTF-8 string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     * The source string and the destination buffer must not overlap.
+     *
+     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
+     * @param options   Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
+     * @param src       The original string.
+     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+     * @param dest      A buffer for the result string. The result will be NUL-terminated if
+     *                  the buffer is large enough.
+     *                  The contents is undefined in case of failure.
+     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+     *                  dest may be NULL and the function will only return the length of the result
+     *                  without writing any of the result string.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     * @return The length of the result string, if successful.
+     *         When the result would be longer than destCapacity,
+     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+     *
+     * @see ucasemap_utf8ToLower
+     * @draft ICU 59
+     */
+     static int32_t utf8ToLower(
+            const char *locale, uint32_t options,
+            const char *src, int32_t srcLength,
+            char *dest, int32_t destCapacity, Edits *edits,
+            UErrorCode &errorCode);
+
+    /**
+     * Uppercases a UTF-8 string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     * The source string and the destination buffer must not overlap.
+     *
+     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
+     * @param options   Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
+     * @param src       The original string.
+     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+     * @param dest      A buffer for the result string. The result will be NUL-terminated if
+     *                  the buffer is large enough.
+     *                  The contents is undefined in case of failure.
+     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+     *                  dest may be NULL and the function will only return the length of the result
+     *                  without writing any of the result string.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     * @return The length of the result string, if successful.
+     *         When the result would be longer than destCapacity,
+     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+     *
+     * @see ucasemap_utf8ToUpper
+     * @draft ICU 59
+     */
+    static int32_t utf8ToUpper(
+            const char *locale, uint32_t options,
+            const char *src, int32_t srcLength,
+            char *dest, int32_t destCapacity, Edits *edits,
+            UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+    /**
+     * Titlecases a UTF-8 string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     * The source string and the destination buffer must not overlap.
+     *
+     * Titlecasing uses a break iterator to find the first characters of words
+     * that are to be titlecased. It titlecases those characters and lowercases
+     * all others. (This can be modified with options bits.)
+     *
+     * @param locale    The locale ID. ("" = root locale, NULL = default locale.)
+     * @param options   Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
+     *                  U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
+     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
+     *                  It is set to the source string (setText())
+     *                  and used one or more times for iteration (first() and next()).
+     *                  If NULL, then a word break iterator for the locale is used
+     *                  (or something equivalent).
+     * @param src       The original string.
+     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+     * @param dest      A buffer for the result string. The result will be NUL-terminated if
+     *                  the buffer is large enough.
+     *                  The contents is undefined in case of failure.
+     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+     *                  dest may be NULL and the function will only return the length of the result
+     *                  without writing any of the result string.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     * @return The length of the result string, if successful.
+     *         When the result would be longer than destCapacity,
+     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+     *
+     * @see ucasemap_utf8ToTitle
+     * @draft ICU 59
+     */
+    static int32_t utf8ToTitle(
+            const char *locale, uint32_t options, BreakIterator *iter,
+            const char *src, int32_t srcLength,
+            char *dest, int32_t destCapacity, Edits *edits,
+            UErrorCode &errorCode);
+
+#endif  // UCONFIG_NO_BREAK_ITERATION
+
+    /**
+     * Case-folds a UTF-8 string and optionally records edits.
+     *
+     * Case folding is locale-independent and not context-sensitive,
+     * but there is an option for whether to include or exclude mappings for dotted I
+     * and dotless i that are marked with 'T' in CaseFolding.txt.
+     *
+     * The result may be longer or shorter than the original.
+     * The source string and the destination buffer must not overlap.
+     *
+     * @param options   Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
+     *                  U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
+     * @param src       The original string.
+     * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+     * @param dest      A buffer for the result string. The result will be NUL-terminated if
+     *                  the buffer is large enough.
+     *                  The contents is undefined in case of failure.
+     * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+     *                  dest may be NULL and the function will only return the length of the result
+     *                  without writing any of the result string.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  The Edits contents is undefined if any error occurs.
+     *                  This function calls edits->reset() first. edits can be NULL.
+     * @param errorCode Reference to an in/out error code value
+     *                  which must not indicate a failure before the function call.
+     * @return The length of the result string, if successful.
+     *         When the result would be longer than destCapacity,
+     *         the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+     *
+     * @see ucasemap_utf8FoldCase
+     * @draft ICU 59
+     */
+    static int32_t utf8Fold(
+            uint32_t options,
+            const char *src, int32_t srcLength,
+            char *dest, int32_t destCapacity, Edits *edits,
+            UErrorCode &errorCode);
+
  private:
      CaseMap() = delete;
      CaseMap(const CaseMap &other) = delete;
diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp

index 0e38a42e1032c78afc44a94e2ea57f68d1d3678c..b12e7a7c0b3a102404c0c1bf114ddf80100b7f08 100644 (file)
--- a/icu4c/source/common/ustrcase.cpp
+++ b/icu4c/source/common/ustrcase.cpp
@@ -1000,7 +1000,7 @@ int32_t toUpper(uint32_t options,
          state = nextState;
      }
  
-    return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
+    return destIndex;
  }
  
  }  // namespace GreekUpper
@@ -1031,17 +1031,20 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
                           const UChar *src, int32_t srcLength,
                           icu::Edits *edits,
                           UErrorCode &errorCode) {
+    int32_t destIndex;
      if (caseLocale == UCASE_LOC_GREEK) {
-        return GreekUpper::toUpper(options, dest, destCapacity, src, srcLength, edits, errorCode);
+        destIndex = GreekUpper::toUpper(options, dest, destCapacity,
+                                        src, srcLength, edits, errorCode);
+    } else {
+        UCaseContext csc=UCASECONTEXT_INITIALIZER;
+        csc.p=(void *)src;
+        csc.limit=srcLength;
+        destIndex = _caseMap(
+            caseLocale, options, ucase_toFullUpper,
+            dest, destCapacity,
+            src, &csc, 0, srcLength,
+            edits, errorCode);
      }
-    UCaseContext csc=UCASECONTEXT_INITIALIZER;
-    csc.p=(void *)src;
-    csc.limit=srcLength;
-    int32_t destIndex = _caseMap(
-        caseLocale, options, ucase_toFullUpper,
-        dest, destCapacity,
-        src, &csc, 0, srcLength,
-        edits, errorCode);
      return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
  }
  
diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp

index 99a11af5ac1aee3fb2a034bc5dc0d67e8e308e74..037a090ea3f8a5b98acfc7a3e2ba0f5093f62738 100644 (file)
--- a/icu4c/source/test/intltest/strcase.cpp
+++ b/icu4c/source/test/intltest/strcase.cpp
@@ -59,10 +59,11 @@ public:
      void TestBufferOverflow();
      void TestEdits();
      void TestCaseMapWithEdits();
+    void TestCaseMapUTF8WithEdits();
      void TestLongUnicodeString();
  
  private:
-    void assertGreekUpper(const char *s, const char *expected);
+    void assertGreekUpper(const char16_t *s, const char16_t *expected);
      void checkEditsIter(
          const UnicodeString &name, Edits::Iterator ei1, Edits::Iterator ei2,  // two equal iterators
          const EditChange expected[], int32_t expLength, UBool withUnchanged,
@@ -96,6 +97,7 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha
      TESTCASE_AUTO(TestBufferOverflow);
      TESTCASE_AUTO(TestEdits);
      TESTCASE_AUTO(TestCaseMapWithEdits);
+    TESTCASE_AUTO(TestCaseMapUTF8WithEdits);
      TESTCASE_AUTO(TestLongUnicodeString);
      TESTCASE_AUTO_END;
  }
@@ -629,9 +631,9 @@ StringCaseTest::TestFullCaseFoldingIterator() {
  }
  
  void
-StringCaseTest::assertGreekUpper(const char *s, const char *expected) {
-    UnicodeString s16 = UnicodeString(s).unescape();
-    UnicodeString expected16 = UnicodeString(expected).unescape();
+StringCaseTest::assertGreekUpper(const char16_t *s, const char16_t *expected) {
+    UnicodeString s16(s);
+    UnicodeString expected16(expected);
      UnicodeString msg = UnicodeString("UnicodeString::toUpper/Greek(\"") + s16 + "\")";
      UnicodeString result16(s16);
      result16.toUpper(GREEK_LOCALE_);
@@ -713,86 +715,31 @@ StringCaseTest::assertGreekUpper(const char *s, const char *expected) {
  
  void
  StringCaseTest::TestGreekUpper() {
-    // See UCharacterCaseTest.java for human-readable strings.
-
      // http://bugs.icu-project.org/trac/ticket/5456
-    assertGreekUpper("\\u03AC\\u03B4\\u03B9\\u03BA\\u03BF\\u03C2, "
-                     "\\u03BA\\u03B5\\u03AF\\u03BC\\u03B5\\u03BD\\u03BF, "
-                     "\\u03AF\\u03C1\\u03B9\\u03B4\\u03B1",
-                     "\\u0391\\u0394\\u0399\\u039A\\u039F\\u03A3, "
-                     "\\u039A\\u0395\\u0399\\u039C\\u0395\\u039D\\u039F, "
-                     "\\u0399\\u03A1\\u0399\\u0394\\u0391");
+    assertGreekUpper(u"άδικος, κείμενο, ίριδα", u"ΑΔΙΚΟΣ, ΚΕΙΜΕΝΟ, ΙΡΙΔΑ");
      // https://bugzilla.mozilla.org/show_bug.cgi?id=307039
      // https://bug307039.bmoattachments.org/attachment.cgi?id=194893
-    assertGreekUpper("\\u03A0\\u03B1\\u03C4\\u03AC\\u03C4\\u03B1",
-                     "\\u03A0\\u0391\\u03A4\\u0391\\u03A4\\u0391");
-    assertGreekUpper("\\u0391\\u03AD\\u03C1\\u03B1\\u03C2, "
-                     "\\u039C\\u03C5\\u03C3\\u03C4\\u03AE\\u03C1\\u03B9\\u03BF, "
-                     "\\u03A9\\u03C1\\u03B1\\u03AF\\u03BF",
-                     "\\u0391\\u0395\\u03A1\\u0391\\u03A3, "
-                     "\\u039C\\u03A5\\u03A3\\u03A4\\u0397\\u03A1\\u0399\\u039F, "
-                     "\\u03A9\\u03A1\\u0391\\u0399\\u039F");
-    assertGreekUpper("\\u039C\\u03B1\\u0390\\u03BF\\u03C5, \\u03A0\\u03CC\\u03C1\\u03BF\\u03C2, "
-                     "\\u03A1\\u03CD\\u03B8\\u03BC\\u03B9\\u03C3\\u03B7",
-                     "\\u039C\\u0391\\u03AA\\u039F\\u03A5, \\u03A0\\u039F\\u03A1\\u039F\\u03A3, "
-                     "\\u03A1\\u03A5\\u0398\\u039C\\u0399\\u03A3\\u0397");
-    assertGreekUpper("\\u03B0, \\u03A4\\u03B7\\u03C1\\u03CE, \\u039C\\u03AC\\u03B9\\u03BF\\u03C2",
-                     "\\u03AB, \\u03A4\\u0397\\u03A1\\u03A9, \\u039C\\u0391\\u03AA\\u039F\\u03A3");
-    assertGreekUpper("\\u03AC\\u03C5\\u03BB\\u03BF\\u03C2",
-                     "\\u0391\\u03AB\\u039B\\u039F\\u03A3");
-    assertGreekUpper("\\u0391\\u03AB\\u039B\\u039F\\u03A3",
-                     "\\u0391\\u03AB\\u039B\\u039F\\u03A3");
-    assertGreekUpper("\\u0386\\u03BA\\u03BB\\u03B9\\u03C4\\u03B1 "
-                     "\\u03C1\\u03AE\\u03BC\\u03B1\\u03C4\\u03B1 \\u03AE "
-                     "\\u03AC\\u03BA\\u03BB\\u03B9\\u03C4\\u03B5\\u03C2 "
-                     "\\u03BC\\u03B5\\u03C4\\u03BF\\u03C7\\u03AD\\u03C2",
-                     "\\u0391\\u039A\\u039B\\u0399\\u03A4\\u0391 "
-                     "\\u03A1\\u0397\\u039C\\u0391\\u03A4\\u0391 \\u0397\\u0301 "
-                     "\\u0391\\u039A\\u039B\\u0399\\u03A4\\u0395\\u03A3 "
-                     "\\u039C\\u0395\\u03A4\\u039F\\u03A7\\u0395\\u03A3");
+    assertGreekUpper(u"Πατάτα", u"ΠΑΤΑΤΑ");
+    assertGreekUpper(u"Αέρας, Μυστήριο, Ωραίο", u"ΑΕΡΑΣ, ΜΥΣΤΗΡΙΟ, ΩΡΑΙΟ");
+    assertGreekUpper(u"Μαΐου, Πόρος, Ρύθμιση", u"ΜΑΪΟΥ, ΠΟΡΟΣ, ΡΥΘΜΙΣΗ");
+    assertGreekUpper(u"ΰ, Τηρώ, Μάιος", u"Ϋ, ΤΗΡΩ, ΜΑΪΟΣ");
+    assertGreekUpper(u"άυλος", u"ΑΫΛΟΣ");
+    assertGreekUpper(u"ΑΫΛΟΣ", u"ΑΫΛΟΣ");
+    assertGreekUpper(u"Άκλιτα ρήματα ή άκλιτες μετοχές", u"ΑΚΛΙΤΑ ΡΗΜΑΤΑ Ή ΑΚΛΙΤΕΣ ΜΕΤΟΧΕΣ");
      // http://www.unicode.org/udhr/d/udhr_ell_monotonic.html
-    assertGreekUpper("\\u0395\\u03C0\\u03B5\\u03B9\\u03B4\\u03AE \\u03B7 "
-                     "\\u03B1\\u03BD\\u03B1\\u03B3\\u03BD\\u03CE\\u03C1\\u03B9\\u03C3\\u03B7 "
-                     "\\u03C4\\u03B7\\u03C2 \\u03B1\\u03BE\\u03B9\\u03BF\\u03C0\\u03C1\\u03AD"
-                     "\\u03C0\\u03B5\\u03B9\\u03B1\\u03C2",
-                     "\\u0395\\u03A0\\u0395\\u0399\\u0394\\u0397 \\u0397 "
-                     "\\u0391\\u039D\\u0391\\u0393\\u039D\\u03A9\\u03A1\\u0399\\u03A3\\u0397 "
-                     "\\u03A4\\u0397\\u03A3 \\u0391\\u039E\\u0399\\u039F\\u03A0\\u03A1\\u0395"
-                     "\\u03A0\\u0395\\u0399\\u0391\\u03A3");
-    assertGreekUpper("\\u03BD\\u03BF\\u03BC\\u03B9\\u03BA\\u03BF\\u03CD \\u03AE "
-                     "\\u03B4\\u03B9\\u03B5\\u03B8\\u03BD\\u03BF\\u03CD\\u03C2",
-                     "\\u039D\\u039F\\u039C\\u0399\\u039A\\u039F\\u03A5 \\u0397\\u0301 "
-                     "\\u0394\\u0399\\u0395\\u0398\\u039D\\u039F\\u03A5\\u03A3");
+    assertGreekUpper(u"Επειδή η αναγνώριση της αξιοπρέπειας", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ ΤΗΣ ΑΞΙΟΠΡΕΠΕΙΑΣ");
+    assertGreekUpper(u"νομικού ή διεθνούς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
      // http://unicode.org/udhr/d/udhr_ell_polytonic.html
-    assertGreekUpper("\\u1F18\\u03C0\\u03B5\\u03B9\\u03B4\\u1F74 \\u1F21 "
-                     "\\u1F00\\u03BD\\u03B1\\u03B3\\u03BD\\u1F7D\\u03C1\\u03B9\\u03C3\\u03B7",
-                     "\\u0395\\u03A0\\u0395\\u0399\\u0394\\u0397 \\u0397 "
-                     "\\u0391\\u039D\\u0391\\u0393\\u039D\\u03A9\\u03A1\\u0399\\u03A3\\u0397");
-    assertGreekUpper("\\u03BD\\u03BF\\u03BC\\u03B9\\u03BA\\u03BF\\u1FE6 \\u1F22 "
-                     "\\u03B4\\u03B9\\u03B5\\u03B8\\u03BD\\u03BF\\u1FE6\\u03C2",
-                     "\\u039D\\u039F\\u039C\\u0399\\u039A\\u039F\\u03A5 \\u0397\\u0301 "
-                     "\\u0394\\u0399\\u0395\\u0398\\u039D\\u039F\\u03A5\\u03A3");
+    assertGreekUpper(u"Ἐπειδὴ ἡ ἀναγνώριση", u"ΕΠΕΙΔΗ Η ΑΝΑΓΝΩΡΙΣΗ");
+    assertGreekUpper(u"νομικοῦ ἢ διεθνοῦς", u"ΝΟΜΙΚΟΥ Ή ΔΙΕΘΝΟΥΣ");
      // From Google bug report
-    assertGreekUpper("\\u039D\\u03AD\\u03BF, "
-                     "\\u0394\\u03B7\\u03BC\\u03B9\\u03BF\\u03C5\\u03C1\\u03B3\\u03AF\\u03B1",
-                     "\\u039D\\u0395\\u039F, "
-                     "\\u0394\\u0397\\u039C\\u0399\\u039F\\u03A5\\u03A1\\u0393\\u0399\\u0391");
+    assertGreekUpper(u"Νέο, Δημιουργία", u"ΝΕΟ, ΔΗΜΙΟΥΡΓΙΑ");
      // http://crbug.com/234797
-    assertGreekUpper("\\u0395\\u03BB\\u03AC\\u03C4\\u03B5 \\u03BD\\u03B1 \\u03C6\\u03AC\\u03C4\\u03B5 "
-                     "\\u03C4\\u03B1 \\u03BA\\u03B1\\u03BB\\u03CD\\u03C4\\u03B5\\u03C1\\u03B1 "
-                     "\\u03C0\\u03B1\\u03CA\\u03B4\\u03AC\\u03BA\\u03B9\\u03B1!",
-                     "\\u0395\\u039B\\u0391\\u03A4\\u0395 \\u039D\\u0391 \\u03A6\\u0391\\u03A4\\u0395 "
-                     "\\u03A4\\u0391 \\u039A\\u0391\\u039B\\u03A5\\u03A4\\u0395\\u03A1\\u0391 "
-                     "\\u03A0\\u0391\\u03AA\\u0394\\u0391\\u039A\\u0399\\u0391!");
-    assertGreekUpper("\\u039C\\u03B1\\u0390\\u03BF\\u03C5, \\u03C4\\u03C1\\u03CC\\u03BB\\u03B5\\u03CA",
-                     "\\u039C\\u0391\\u03AA\\u039F\\u03A5, \\u03A4\\u03A1\\u039F\\u039B\\u0395\\u03AA");
-    assertGreekUpper("\\u03A4\\u03BF \\u03AD\\u03BD\\u03B1 \\u03AE \\u03C4\\u03BF "
-                     "\\u03AC\\u03BB\\u03BB\\u03BF.",
-                     "\\u03A4\\u039F \\u0395\\u039D\\u0391 \\u0397\\u0301 \\u03A4\\u039F "
-                     "\\u0391\\u039B\\u039B\\u039F.");
+    assertGreekUpper(u"Ελάτε να φάτε τα καλύτερα παϊδάκια!", u"ΕΛΑΤΕ ΝΑ ΦΑΤΕ ΤΑ ΚΑΛΥΤΕΡΑ ΠΑΪΔΑΚΙΑ!");
+    assertGreekUpper(u"Μαΐου, τρόλεϊ", u"ΜΑΪΟΥ, ΤΡΟΛΕΪ");
+    assertGreekUpper(u"Το ένα ή το άλλο.", u"ΤΟ ΕΝΑ Ή ΤΟ ΑΛΛΟ.");
      // http://multilingualtypesetting.co.uk/blog/greek-typesetting-tips/
-    assertGreekUpper("\\u03C1\\u03C9\\u03BC\\u03AD\\u03B9\\u03BA\\u03B1",
-                     "\\u03A1\\u03A9\\u039C\\u0395\\u03AA\\u039A\\u0391");
+    assertGreekUpper(u"ρωμέικα", u"ΡΩΜΕΪΚΑ");
  }
  
  void
@@ -939,7 +886,7 @@ void StringCaseTest::checkEditsIter(
          }
      }
      // TODO: remove casts from u"" when merging into trunk
-    UnicodeString msg = UnicodeString(name).append((const UChar *)u" end");
+    UnicodeString msg = UnicodeString(name).append(u" end");
      assertFalse(msg, ei1.next(errorCode));
      assertFalse(msg, ei1.hasChange());
      assertEquals(msg, 0, ei1.oldLength());
@@ -979,10 +926,10 @@ void StringCaseTest::TestEdits() {
              { FALSE, 10003, 10003 },
              { TRUE, 103103, 104013 }
      };
-    checkEditsIter((const UChar *)u"coarse",
+    checkEditsIter(u"coarse",
              edits.getCoarseIterator(), edits.getCoarseIterator(),
              coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), TRUE, errorCode);
-    checkEditsIter((const UChar *)u"coarse changes",
+    checkEditsIter(u"coarse changes",
              edits.getCoarseChangesIterator(), edits.getCoarseChangesIterator(),
              coarseExpectedChanges, UPRV_LENGTHOF(coarseExpectedChanges), FALSE, errorCode);
  
@@ -996,10 +943,10 @@ void StringCaseTest::TestEdits() {
              { TRUE, 3000, 4000 },
              { TRUE, 100000, 100000 }
      };
-    checkEditsIter((const UChar *)u"fine",
+    checkEditsIter(u"fine",
              edits.getFineIterator(), edits.getFineIterator(),
              fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), TRUE, errorCode);
-    checkEditsIter((const UChar *)u"fine changes",
+    checkEditsIter(u"fine changes",
              edits.getFineChangesIterator(), edits.getFineChangesIterator(),
              fineExpectedChanges, UPRV_LENGTHOF(fineExpectedChanges), FALSE, errorCode);
  
@@ -1016,23 +963,23 @@ void StringCaseTest::TestCaseMapWithEdits() {
      Edits edits;
  
      int32_t length = CaseMap::toLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT,
-                                      (const UChar *)u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
-    assertEquals((const UChar *)u"toLower(Istanbul)", UnicodeString((const UChar *)u"ıb"), UnicodeString(TRUE, dest, length));
+                                      u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length));
      static const EditChange lowerExpectedChanges[] = {
              { TRUE, 1, 1 },
              { FALSE, 4, 4 },
              { TRUE, 1, 1 },
              { FALSE, 2, 2 }
      };
-    checkEditsIter((const UChar *)u"toLower(Istanbul)",
+    checkEditsIter(u"toLower(IstanBul)",
              edits.getFineIterator(), edits.getFineIterator(),
              lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
              TRUE, errorCode);
  
      edits.reset();
      length = CaseMap::toUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT,
-                              (const UChar *)u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
-    assertEquals((const UChar *)u"toUpper(Πατάτα)", UnicodeString((const UChar *)u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
+                              u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length));
      static const EditChange upperExpectedChanges[] = {
              { FALSE, 1, 1 },
              { TRUE, 1, 1 },
@@ -1041,7 +988,7 @@ void StringCaseTest::TestCaseMapWithEdits() {
              { TRUE, 1, 1 },
              { TRUE, 1, 1 }
      };
-    checkEditsIter((const UChar *)u"toUpper(Πατάτα)",
+    checkEditsIter(u"toUpper(Πατάτα)",
              edits.getFineIterator(), edits.getFineIterator(),
              upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
              TRUE, errorCode);
@@ -1051,23 +998,23 @@ void StringCaseTest::TestCaseMapWithEdits() {
                                UCASEMAP_OMIT_UNCHANGED_TEXT |
                                U_TITLECASE_NO_BREAK_ADJUSTMENT |
                                U_TITLECASE_NO_LOWERCASE,
-                              NULL, (const UChar *)u"IjssEL IglOo", 12,
+                              NULL, u"IjssEL IglOo", 12,
                                dest, UPRV_LENGTHOF(dest), &edits, errorCode);
-    assertEquals((const UChar *)u"toTitle(IjssEL IglOo)", UnicodeString((const UChar *)u"J"), UnicodeString(TRUE, dest, length));
+    assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length));
      static const EditChange titleExpectedChanges[] = {
              { FALSE, 1, 1 },
              { TRUE, 1, 1 },
              { FALSE, 10, 10 }
      };
-    checkEditsIter((const UChar *)u"toTitle(IjssEL IglOo)",
+    checkEditsIter(u"toTitle(IjssEL IglOo)",
              edits.getFineIterator(), edits.getFineIterator(),
              titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
              TRUE, errorCode);
  
      edits.reset();
      length = CaseMap::fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
-                           (const UChar *)u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
-    assertEquals((const UChar *)u"foldCase(IßtanBul)", UnicodeString((const UChar *)u"ıssb"), UnicodeString(TRUE, dest, length));
+                           u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length));
      static const EditChange foldExpectedChanges[] = {
              { TRUE, 1, 1 },
              { TRUE, 1, 2 },
@@ -1075,7 +1022,82 @@ void StringCaseTest::TestCaseMapWithEdits() {
              { TRUE, 1, 1 },
              { FALSE, 2, 2 }
      };
-    checkEditsIter((const UChar *)u"foldCase(IßtanBul)",
+    checkEditsIter(u"foldCase(IßtanBul)",
+            edits.getFineIterator(), edits.getFineIterator(),
+            foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
+            TRUE, errorCode);
+}
+
+void StringCaseTest::TestCaseMapUTF8WithEdits() {
+    IcuTestErrorCode errorCode(*this, "TestEdits");
+    char dest[50];
+    Edits edits;
+
+    int32_t length = CaseMap::utf8ToLower("tr", UCASEMAP_OMIT_UNCHANGED_TEXT,
+                                          u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"),
+                 UnicodeString::fromUTF8(StringPiece(dest, length)));
+    static const EditChange lowerExpectedChanges[] = {
+            { TRUE, 1, 2 },
+            { FALSE, 4, 4 },
+            { TRUE, 1, 1 },
+            { FALSE, 2, 2 }
+    };
+    checkEditsIter(u"toLower(IstanBul)",
+            edits.getFineIterator(), edits.getFineIterator(),
+            lowerExpectedChanges, UPRV_LENGTHOF(lowerExpectedChanges),
+            TRUE, errorCode);
+
+    edits.reset();
+    length = CaseMap::utf8ToUpper("el", UCASEMAP_OMIT_UNCHANGED_TEXT,
+                                  u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"),
+                 UnicodeString::fromUTF8(StringPiece(dest, length)));
+    static const EditChange upperExpectedChanges[] = {
+            { FALSE, 2, 2 },
+            { TRUE, 2, 2 },
+            { TRUE, 2, 2 },
+            { TRUE, 2, 2 },
+            { TRUE, 2, 2 },
+            { TRUE, 2, 2 }
+    };
+    checkEditsIter(u"toUpper(Πατάτα)",
+            edits.getFineIterator(), edits.getFineIterator(),
+            upperExpectedChanges, UPRV_LENGTHOF(upperExpectedChanges),
+            TRUE, errorCode);
+
+    edits.reset();
+    length = CaseMap::utf8ToTitle("nl",
+                                  UCASEMAP_OMIT_UNCHANGED_TEXT |
+                                  U_TITLECASE_NO_BREAK_ADJUSTMENT |
+                                  U_TITLECASE_NO_LOWERCASE,
+                                  NULL, u8"IjssEL IglOo", 12,
+                                  dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"),
+                 UnicodeString::fromUTF8(StringPiece(dest, length)));
+    static const EditChange titleExpectedChanges[] = {
+            { FALSE, 1, 1 },
+            { TRUE, 1, 1 },
+            { FALSE, 10, 10 }
+    };
+    checkEditsIter(u"toTitle(IjssEL IglOo)",
+            edits.getFineIterator(), edits.getFineIterator(),
+            titleExpectedChanges, UPRV_LENGTHOF(titleExpectedChanges),
+            TRUE, errorCode);
+
+    edits.reset();
+    length = CaseMap::utf8Fold(UCASEMAP_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I,
+                               u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), &edits, errorCode);
+    assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"),
+                 UnicodeString::fromUTF8(StringPiece(dest, length)));
+    static const EditChange foldExpectedChanges[] = {
+            { TRUE, 1, 2 },
+            { TRUE, 2, 2 },
+            { FALSE, 3, 3 },
+            { TRUE, 1, 1 },
+            { FALSE, 2, 2 }
+    };
+    checkEditsIter(u"foldCase(IßtanBul)",
              edits.getFineIterator(), edits.getFineIterator(),
              foldExpectedChanges, UPRV_LENGTHOF(foldExpectedChanges),
              TRUE, errorCode);
author	Markus Scherer <markus.icu@gmail.com>
	Tue, 14 Mar 2017 23:55:29 +0000 (23:55 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Tue, 14 Mar 2017 23:55:29 +0000 (23:55 +0000)
icu4c/source/common/ucasemap.cpp		patch \| blob \| history
icu4c/source/common/ucasemap_imp.h		patch \| blob \| history
icu4c/source/common/ucasemap_titlecase_brkiter.cpp		patch \| blob \| history
icu4c/source/common/unicode/casemap.h		patch \| blob \| history
icu4c/source/common/ustrcase.cpp		patch \| blob \| history
icu4c/source/test/intltest/strcase.cpp		patch \| blob \| history