ICU-12410 start to port class CaseMap to Java; make case mapping impl code work with...

author Markus Scherer <markus.icu@gmail.com>

Wed, 25 Jan 2017 23:52:55 +0000 (23:52 +0000)

committer Markus Scherer <markus.icu@gmail.com>

Wed, 25 Jan 2017 23:52:55 +0000 (23:52 +0000)
author Markus Scherer <markus.icu@gmail.com>
Wed, 25 Jan 2017 23:52:55 +0000 (23:52 +0000)
committer Markus Scherer <markus.icu@gmail.com>
Wed, 25 Jan 2017 23:52:55 +0000 (23:52 +0000)
diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp

index d213d039df18a243b53edd81946040e8672a378a..2a44f6eba4c5dbcb2b443dc968ddfcd6ea7520c6 100644 (file)
--- a/icu4c/source/common/ustrcase.cpp
+++ b/icu4c/source/common/ustrcase.cpp
@@ -1177,7 +1177,7 @@ UBool isFollowedByCasedLetter(const UCaseProps *csp, const UChar *s, int32_t i,
   * for each character.
   * TODO: Try to re-consolidate one way or another with the non-Greek function.
   */
-int32_t toUpper(int32_t caseLocale, uint32_t options,
+int32_t toUpper(uint32_t options,
                  UChar *dest, int32_t destCapacity,
                  const UChar *src, int32_t srcLength,
                  Edits *edits,
@@ -1305,7 +1305,7 @@ int32_t toUpper(int32_t caseLocale, uint32_t options,
              }
          } else {
              const UChar *s;
-            c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, caseLocale);
+            c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, UCASE_LOC_GREEK);
              destIndex = appendResult(dest, destIndex, destCapacity, c, s,
                                       nextIndex - i, options, edits);
              if (destIndex < 0) {
@@ -1349,7 +1349,7 @@ ustrcase_internalToUpper(int32_t caseLocale, uint32_t options, UCASEMAP_BREAK_IT
                           icu::Edits *edits,
                           UErrorCode &errorCode) {
      if (caseLocale == UCASE_LOC_GREEK) {
-        return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, edits, errorCode);
+        return GreekUpper::toUpper(options, dest, destCapacity, src, srcLength, edits, errorCode);
      }
      UCaseContext csc=UCASECONTEXT_INITIALIZER;
      csc.p=(void *)src;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java

index c9b0206747abd73b4a3177d5f22afb86e655f3b7..8ec51544b6f23737797cc131e2b3520863b5d3ea 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java
@@ -2,6 +2,10 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.impl;
  
+import java.io.IOException;
+
+import com.ibm.icu.text.Edits;
+import com.ibm.icu.util.ICUUncheckedIOException;
  import com.ibm.icu.util.ULocale;
  
  // TODO: rename to CaseMapImpl
@@ -13,11 +17,11 @@ public final class CaseMap {
      public static final class StringContextIterator implements UCaseProps.ContextIterator {
          /**
           * Constructor.
-         * @param s String to iterate over.
+         * @param src String to iterate over.
           */
-        public StringContextIterator(String s) {
-            this.s=s;
-            limit=s.length();
+        public StringContextIterator(CharSequence src) {
+            this.s=src;
+            limit=src.length();
              cpStart=cpLimit=index=0;
              dir=0;
          }
@@ -61,7 +65,7 @@ public final class CaseMap {
          public int nextCaseMapCP() {
              cpStart=cpLimit;
              if(cpLimit<limit) {
-                int c=s.codePointAt(cpLimit);
+                int c=Character.codePointAt(s, cpLimit);
                  cpLimit+=Character.charCount(c);
                  return c;
              } else {
@@ -85,6 +89,10 @@ public final class CaseMap {
              return cpLimit;
          }
  
+        public int getCPLength() {
+            return cpLimit-cpStart;
+        }
+
          // implement UCaseProps.ContextIterator
          // The following code is not used anywhere in this private class
          @Override
@@ -109,11 +117,11 @@ public final class CaseMap {
              int c;
  
              if(dir>0 && index<s.length()) {
-                c=s.codePointAt(index);
+                c=Character.codePointAt(s, index);
                  index+=Character.charCount(c);
                  return c;
              } else if(dir<0 && index>0) {
-                c=s.codePointBefore(index);
+                c=Character.codePointBefore(s, index);
                  index-=Character.charCount(c);
                  return c;
              }
@@ -121,44 +129,107 @@ public final class CaseMap {
          }
  
          // variables
-        protected String s;
+        protected CharSequence s;
          protected int index, limit, cpStart, cpLimit;
          protected int dir; // 0=initial state  >0=forward  <0=backward
      }
  
-    /** Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. */
-    private static final void appendResult(int c, StringBuilder result) {
+    private static int appendCodePoint(Appendable a, int c) throws IOException {
+        if (c <= Character.MAX_VALUE) {
+            a.append((char)c);
+            return 1;
+        } else {
+            a.append((char)(0xd7c0 + (c >> 10)));
+            a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
+            return 2;
+        }
+    }
+
+    /**
+     * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
+     * @throws IOException
+     */
+    private static void appendResult(int result, Appendable dest,
+            int cpLength, int options, Edits edits) throws IOException {
          // Decode the result.
-        if (c < 0) {
+        if (result < 0) {
              // (not) original code point
-            result.appendCodePoint(~c);
-        } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
+            if (edits != null) {
+                edits.addUnchanged(cpLength);
+                // TODO: remove package path
+                if ((options & com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT) != 0) {
+                    return;
+                }
+            }
+            appendCodePoint(dest, ~result);
+        } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
              // The mapping has already been appended to result.
+            if (edits != null) {
+                edits.addReplace(cpLength, result);
+            }
          } else {
              // Append the single-code point mapping.
-            result.appendCodePoint(c);
+            int length = appendCodePoint(dest, result);
+            if (edits != null) {
+                edits.addReplace(cpLength, length);
+            }
          }
      }
  
-    // TODO: Move the other string case mapping functions from UCharacter to here, too.
-
-    public static String toUpper(ULocale locale, String str) {
-        if (locale == null) {
-            locale = ULocale.getDefault();
+    private static final void appendUnchanged(CharSequence src, int start, int length,
+            Appendable dest, int options, Edits edits) throws IOException {
+        if (length > 0) {
+            if (edits != null) {
+                edits.addUnchanged(length);
+                // TODO: remove package path
+                if ((options & com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT) != 0) {
+                    return;
+                }
+            }
+            dest.append(src, start, start + length);
          }
-        int[] locCache = new int[] { UCaseProps.getCaseLocale(locale, null) };
-        if (locCache[0] == UCaseProps.LOC_GREEK) {
-            return GreekUpper.toUpper(str, locCache);
+    }
+
+    public static <A extends Appendable> A toLower(int caseLocale, int options,
+            CharSequence src, A dest, Edits edits) {
+        try {
+            if (edits != null) {
+                edits.reset();
+            }
+            StringContextIterator iter = new StringContextIterator(src);
+            int c;
+            while ((c = iter.nextCaseMapCP()) >= 0) {
+                c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
+                appendResult(c, dest, iter.getCPLength(), options, edits);
+            }
+            return dest;
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
          }
+    }
  
-        StringContextIterator iter = new StringContextIterator(str);
-        StringBuilder result = new StringBuilder(str.length());
-        int c;
-        while((c=iter.nextCaseMapCP())>=0) {
-            c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
-            appendResult(c, result);
+    public static String toUpper(ULocale locale, String str) {
+        try {
+            int options = 0; Edits edits = null;  // TODO
+            if (locale == null) {
+                locale = ULocale.getDefault();
+            }
+            int caseLocale = UCaseProps.getCaseLocale(locale);
+            if (caseLocale == UCaseProps.LOC_GREEK) {
+                return GreekUpper.toUpper(str);
+            }
+
+            StringContextIterator iter = new StringContextIterator(str);
+            StringBuilder result = new StringBuilder(str.length());
+            int c;
+            while((c=iter.nextCaseMapCP())>=0) {
+                c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, caseLocale);
+                appendResult(c, result, iter.getCPLength(), options, edits);
+            }
+            return result.toString();
+        } catch (IOException e) {
+            throw new ICUUncheckedIOException(e);
          }
-        return result.toString();
      }
  
      private static final class GreekUpper {
@@ -662,8 +733,10 @@ public final class CaseMap {
           * TODO: Try to re-consolidate one way or another with the non-Greek function.
           *
           * <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
+         * @throws IOException
           */
-        private static String toUpper(CharSequence s, int[] locCache) {
+        private static String toUpper(CharSequence s) throws IOException {
+            int options = 0; Edits edits = null;  // TODO
              StringBuilder result = new StringBuilder(s.length());
              int state = 0;
              for (int i = 0; i < s.length();) {
@@ -747,8 +820,8 @@ public final class CaseMap {
                          --numYpogegrammeni;
                      }
                  } else {
-                    c = UCaseProps.INSTANCE.toFullUpper(c, null, result, null, locCache);
-                    appendResult(c, result);
+                    c = UCaseProps.INSTANCE.toFullUpper(c, null, result, UCaseProps.LOC_GREEK);
+                    appendResult(c, result, nextIndex - i, options, edits);
                  }
                  i = nextIndex;
                  state = nextState;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java

index 927cdc03cdd71e5a1e1f6a0da1e4d4564791c7fc..1da49283ac58d285edd34d1e70cd9d75aa33e899 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java
@@ -24,6 +24,7 @@ package com.ibm.icu.impl;
  import java.io.IOException;
  import java.nio.ByteBuffer;
  import java.util.Iterator;
+import java.util.Locale;
  
  import com.ibm.icu.lang.UCharacter;
  import com.ibm.icu.lang.UProperty;
@@ -71,7 +72,7 @@ public final class UCaseProps {
          // read exceptions[]
          count=indexes[IX_EXC_LENGTH];
          if(count>0) {
-            exceptions=ICUBinary.getChars(bytes, count, 0);
+            exceptions=ICUBinary.getString(bytes, count, 0);
          }
  
          // read unfold[]
@@ -150,7 +151,7 @@ public final class UCaseProps {
       *
       * @param excWord (in) initial exceptions word
       * @param index (in) desired slot index
-     * @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
+     * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
       * @return bits 31..0: slot value
       *             63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
       */
@@ -158,11 +159,11 @@ public final class UCaseProps {
          long value;
          if((excWord&EXC_DOUBLE_SLOTS)==0) {
              excOffset+=slotOffset(excWord, index);
-            value=exceptions[excOffset];
+            value=exceptions.charAt(excOffset);
          } else {
              excOffset+=2*slotOffset(excWord, index);
-            value=exceptions[excOffset++];
-            value=(value<<16)|exceptions[excOffset];
+            value=exceptions.charAt(excOffset++);
+            value=(value<<16)|exceptions.charAt(excOffset);
          }
          return value |((long)excOffset<<32);
      }
@@ -172,11 +173,11 @@ public final class UCaseProps {
          int value;
          if((excWord&EXC_DOUBLE_SLOTS)==0) {
              excOffset+=slotOffset(excWord, index);
-            value=exceptions[excOffset];
+            value=exceptions.charAt(excOffset);
          } else {
              excOffset+=2*slotOffset(excWord, index);
-            value=exceptions[excOffset++];
-            value=(value<<16)|exceptions[excOffset];
+            value=exceptions.charAt(excOffset++);
+            value=(value<<16)|exceptions.charAt(excOffset);
          }
          return value;
      }
@@ -191,7 +192,7 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props);
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              if(hasSlot(excWord, EXC_LOWER)) {
                  c=getSlotValue(excWord, EXC_LOWER, excOffset);
              }
@@ -207,7 +208,7 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props);
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              if(hasSlot(excWord, EXC_UPPER)) {
                  c=getSlotValue(excWord, EXC_UPPER, excOffset);
              }
@@ -223,7 +224,7 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props);
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              int index;
              if(hasSlot(excWord, EXC_TITLE)) {
                  index=EXC_TITLE;
@@ -291,7 +292,7 @@ public final class UCaseProps {
               */
              int excOffset0, excOffset=getExceptionsOffset(props);
              int closureOffset;
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              int index, closureLength, fullLength, length;
  
              excOffset0=excOffset;
@@ -334,7 +335,7 @@ public final class UCaseProps {
                  /* add the full case folding string */
                  length=fullLength&0xf;
                  if(length!=0) {
-                    set.add(new String(exceptions, excOffset, length));
+                    set.add(exceptions.substring(excOffset, excOffset+length));
                      excOffset+=length;
                  }
  
@@ -348,8 +349,9 @@ public final class UCaseProps {
              }
  
              /* add each code point in the closure string */
-            for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
-                c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
+            int limit=closureOffset+closureLength;
+            for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
+                c=exceptions.codePointAt(index);
                  set.add(c);
              }
          }
@@ -468,7 +470,7 @@ public final class UCaseProps {
          if(!propsHasException(props)) {
              return props&DOT_MASK;
          } else {
-            return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
+            return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
          }
      }
  
@@ -605,38 +607,44 @@ public final class UCaseProps {
       */
      public static final int MAX_STRING_LENGTH=0x1f;
  
-    private static final int LOC_UNKNOWN=0;
-    private static final int LOC_ROOT=1;
+    //ivate static final int LOC_UNKNOWN=0;
+    public static final int LOC_ROOT=1;
      private static final int LOC_TURKISH=2;
      private static final int LOC_LITHUANIAN=3;
      static final int LOC_GREEK=4;
  
-    /*
-     * Checks and caches the type of locale ID as it is relevant for case mapping.
-     * If the locCache is not null, then it must be initialized with locCache[0]=0 .
-     */
-    static final int getCaseLocale(ULocale locale, int[] locCache) {
-        int result;
-
-        if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
-            return result;
-        }
-
-        result=LOC_ROOT;
-
-        String language=locale.getLanguage();
-        if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
-            result=LOC_TURKISH;
-        } else if(language.equals("el") || language.equals("ell")) {
-            result=LOC_GREEK;
-        } else if(language.equals("lt") || language.equals("lit")) {
-            result=LOC_LITHUANIAN;
-        }
-
-        if(locCache!=null) {
-            locCache[0]=result;
+    public static final int getCaseLocale(Locale locale) {
+        return getCaseLocale(locale.getLanguage());
+    }
+    public static final int getCaseLocale(ULocale locale) {
+        return getCaseLocale(locale.getLanguage());
+    }
+    /** Accepts both 2- and 3-letter language subtags. */
+    private static final int getCaseLocale(String language) {
+        // Check the subtag length to reduce the number of comparisons
+        // for locales without special behavior.
+        // Fastpath for English "en" which is often used for default (=root locale) case mappings,
+        // and for Chinese "zh": Very common but no special case mapping behavior.
+        if(language.length()==2) {
+            if(language.equals("en") || language.charAt(0)>'t') {
+                return LOC_ROOT;
+            } else if(language.equals("tr") || language.equals("az")) {
+                return LOC_TURKISH;
+            } else if(language.equals("el")) {
+                return LOC_GREEK;
+            } else if(language.equals("lt")) {
+                return LOC_LITHUANIAN;
+            }
+        } else if(language.length()==3) {
+            if(language.equals("tur") || language.equals("aze")) {
+                return LOC_TURKISH;
+            } else if(language.equals("ell")) {
+                return LOC_GREEK;
+            } else if(language.equals("lit")) {
+                return LOC_LITHUANIAN;
+            }
          }
-        return result;
+        return LOC_ROOT;
      }
  
      /* Is followed by {case-ignorable}* cased  ? (dir determines looking forward/backward) */
@@ -797,19 +805,14 @@ public final class UCaseProps {
       *             See ContextIterator for details.
       *             If iter==null then a context-independent result is returned.
       * @param out If the mapping result is a string, then it is appended to out.
-     * @param locale Locale ID for locale-dependent mappings.
-     * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
-     *                 the locale ID for subsequent calls.
-     *                 Can be null.
+     * @param caseLocale Case locale value from ucase_getCaseLocale().
       * @return Output code point or string length, see MAX_STRING_LENGTH.
       *
       * @see ContextIterator
       * @see #MAX_STRING_LENGTH
       * @internal
       */
-    public final int toFullLower(int c, ContextIterator iter,
-                                 StringBuilder out,
-                                 ULocale locale, int[] locCache) {
+    public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
          int result, props;
  
          result=c;
@@ -820,22 +823,20 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props), excOffset2;
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              int full;
  
              excOffset2=excOffset;
  
              if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
                  /* use hardcoded conditions and mappings */
-                int loc=getCaseLocale(locale, locCache);
-
                  /*
                   * Test for conditional mappings first
                   *   (otherwise the unconditional default mappings are always taken),
                   * then test for characters that have unconditional mappings in SpecialCasing.txt,
                   * then get the UnicodeData.txt mappings.
                   */
-                if( loc==LOC_LITHUANIAN &&
+                if( caseLocale==LOC_LITHUANIAN &&
                          /* base characters, find accents above */
                          (((c==0x49 || c==0x4a || c==0x12e) &&
                              isFollowedByMoreAbove(iter)) ||
@@ -858,30 +859,34 @@ public final class UCaseProps {
                          00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
                          0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
                       */
-                    switch(c) {
-                    case 0x49:  /* LATIN CAPITAL LETTER I */
-                        out.append(iDot);
-                        return 2;
-                    case 0x4a:  /* LATIN CAPITAL LETTER J */
-                        out.append(jDot);
-                        return 2;
-                    case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
-                        out.append(iOgonekDot);
-                        return 2;
-                    case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
-                        out.append(iDotGrave);
-                        return 3;
-                    case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
-                        out.append(iDotAcute);
-                        return 3;
-                    case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
-                        out.append(iDotTilde);
-                        return 3;
-                    default:
-                        return 0; /* will not occur */
+                    try {
+                        switch(c) {
+                        case 0x49:  /* LATIN CAPITAL LETTER I */
+                            out.append(iDot);
+                            return 2;
+                        case 0x4a:  /* LATIN CAPITAL LETTER J */
+                            out.append(jDot);
+                            return 2;
+                        case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
+                            out.append(iOgonekDot);
+                            return 2;
+                        case 0xcc:  /* LATIN CAPITAL LETTER I WITH GRAVE */
+                            out.append(iDotGrave);
+                            return 3;
+                        case 0xcd:  /* LATIN CAPITAL LETTER I WITH ACUTE */
+                            out.append(iDotAcute);
+                            return 3;
+                        case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
+                            out.append(iDotTilde);
+                            return 3;
+                        default:
+                            return 0; /* will not occur */
+                        }
+                    } catch (IOException e) {
+                        throw new ICUUncheckedIOException(e);
                      }
                  /* # Turkish and Azeri */
-                } else if(loc==LOC_TURKISH && c==0x130) {
+                } else if(caseLocale==LOC_TURKISH && c==0x130) {
                      /*
                          # I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
                          # The following rules handle those cases.
@@ -890,7 +895,7 @@ public final class UCaseProps {
                          0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
                       */
                      return 0x69;
-                } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
+                } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
                      /*
                          # When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
                          # This matches the behavior of the canonically equivalent I-dot_above
@@ -899,7 +904,7 @@ public final class UCaseProps {
                          0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
                       */
                      return 0; /* remove the dot (continue without output) */
-                } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
+                } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
                      /*
                          # When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
  
@@ -913,8 +918,12 @@ public final class UCaseProps {
  
                          0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
                       */
-                    out.append(iDot);
-                    return 2;
+                    try {
+                        out.append(iDot);
+                        return 2;
+                    } catch (IOException e) {
+                        throw new ICUUncheckedIOException(e);
+                    }
                  } else if(  c==0x3a3 &&
                              !isFollowedByCasedLetter(iter, 1) &&
                              isFollowedByCasedLetter(iter, -1) /* -1=preceded */
@@ -936,11 +945,15 @@ public final class UCaseProps {
                      /* start of full case mapping strings */
                      excOffset=(int)(value>>32)+1;
  
-                    /* set the output pointer to the lowercase mapping */
-                    out.append(exceptions, excOffset, full);
+                    try {
+                        // append the lowercase mapping
+                        out.append(exceptions, excOffset, excOffset+full);
  
-                    /* return the string length */
-                    return full;
+                        /* return the string length */
+                        return full;
+                    } catch (IOException e) {
+                        throw new ICUUncheckedIOException(e);
+                    }
                  }
              }
  
@@ -954,8 +967,8 @@ public final class UCaseProps {
  
      /* internal */
      private final int toUpperOrTitle(int c, ContextIterator iter,
-                                     StringBuilder out,
-                                     ULocale locale, int[] locCache,
+                                     Appendable out,
+                                     int loc,
                                       boolean upperNotTitle) {
          int result;
          int props;
@@ -968,15 +981,13 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props), excOffset2;
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              int full, index;
  
              excOffset2=excOffset;
  
              if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
                  /* use hardcoded conditions and mappings */
-                int loc=getCaseLocale(locale, locCache);
-
                  if(loc==LOC_TURKISH && c==0x69) {
                      /*
                          # Turkish and Azeri
@@ -1026,11 +1037,15 @@ public final class UCaseProps {
                  }
  
                  if(full!=0) {
-                    /* set the output pointer to the result string */
-                    out.append(exceptions, excOffset, full);
-
-                    /* return the string length */
-                    return full;
+                    try {
+                        // append the result string
+                        out.append(exceptions, excOffset, excOffset+full);
+
+                        /* return the string length */
+                        return full;
+                    } catch (IOException e) {
+                        throw new ICUUncheckedIOException(e);
+                    }
                  }
              }
  
@@ -1049,15 +1064,15 @@ public final class UCaseProps {
      }
  
      public final int toFullUpper(int c, ContextIterator iter,
-                                 StringBuilder out,
-                                 ULocale locale, int[] locCache) {
-        return toUpperOrTitle(c, iter, out, locale, locCache, true);
+                                 Appendable out,
+                                 int caseLocale) {
+        return toUpperOrTitle(c, iter, out, caseLocale, true);
      }
  
      public final int toFullTitle(int c, ContextIterator iter,
-                                 StringBuilder out,
-                                 ULocale locale, int[] locCache) {
-        return toUpperOrTitle(c, iter, out, locale, locCache, false);
+                                 Appendable out,
+                                 int caseLocale) {
+        return toUpperOrTitle(c, iter, out, caseLocale, false);
      }
  
      /* case folding ------------------------------------------------------------- */
@@ -1117,7 +1132,7 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props);
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              int index;
              if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
                  /* special case folding mappings, hardcoded */
@@ -1168,7 +1183,7 @@ public final class UCaseProps {
       * together in a way that they still fold to common result strings.
       */
  
-    public final int toFullFolding(int c, StringBuilder out, int options) {
+    public final int toFullFolding(int c, Appendable out, int options) {
          int result;
          int props;
  
@@ -1180,7 +1195,7 @@ public final class UCaseProps {
              }
          } else {
              int excOffset=getExceptionsOffset(props), excOffset2;
-            int excWord=exceptions[excOffset++];
+            int excWord=exceptions.charAt(excOffset++);
              int full, index;
  
              excOffset2=excOffset;
@@ -1194,8 +1209,12 @@ public final class UCaseProps {
                          return 0x69;
                      } else if(c==0x130) {
                          /* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
-                        out.append(iDot);
-                        return 2;
+                        try {
+                            out.append(iDot);
+                            return 2;
+                        } catch (IOException e) {
+                            throw new ICUUncheckedIOException(e);
+                        }
                      }
                  } else {
                      /* Turkic mappings */
@@ -1219,11 +1238,15 @@ public final class UCaseProps {
                  full=(full>>4)&0xf;
  
                  if(full!=0) {
-                    /* set the output pointer to the result string */
-                    out.append(exceptions, excOffset, full);
-
-                    /* return the string length */
-                    return full;
+                    try {
+                        // append the result string
+                        out.append(exceptions, excOffset, excOffset+full);
+
+                        /* return the string length */
+                        return full;
+                    } catch (IOException e) {
+                        throw new ICUUncheckedIOException(e);
+                    }
                  }
              }
  
@@ -1242,7 +1265,6 @@ public final class UCaseProps {
  
      /* case mapping properties API ---------------------------------------------- */
  
-    private static final int[] rootLocCache = { LOC_ROOT };
      /*
       * We need a StringBuilder for multi-code point output from the
       * full case mapping functions. However, we do not actually use that output,
@@ -1282,20 +1304,20 @@ public final class UCaseProps {
           */
          case UProperty.CHANGES_WHEN_LOWERCASED:
              dummyStringBuilder.setLength(0);
-            return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+            return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
          case UProperty.CHANGES_WHEN_UPPERCASED:
              dummyStringBuilder.setLength(0);
-            return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+            return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
          case UProperty.CHANGES_WHEN_TITLECASED:
              dummyStringBuilder.setLength(0);
-            return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+            return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
          /* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
          case UProperty.CHANGES_WHEN_CASEMAPPED:
              dummyStringBuilder.setLength(0);
              return
-                toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
-                toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
-                toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+                toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+                toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+                toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
          default:
              return false;
          }
@@ -1303,7 +1325,7 @@ public final class UCaseProps {
  
      // data members -------------------------------------------------------- ***
      private int indexes[];
-    private char exceptions[];
+    private String exceptions;
      private char unfold[];
  
      private Trie2_16 trie;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java

index 40fecc7b10b2258f083292cbb4f69cc598381945..c2385f84ff82682e812352d737d6b78381e50544 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java
@@ -29,6 +29,7 @@ import com.ibm.icu.impl.UPropertyAliases;
  import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
  import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
  import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.Edits;
  import com.ibm.icu.text.Normalizer2;
  import com.ibm.icu.util.RangeValueIterator;
  import com.ibm.icu.util.ULocale;
@@ -4960,29 +4961,37 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
       * @stable ICU 3.2
       */
      public static String toLowerCase(ULocale locale, String str) {
-        StringContextIterator iter = new StringContextIterator(str);
-        StringBuilder result = new StringBuilder(str.length());
-        int[] locCache = new int[1];
-        int c;
-
-        if (locale == null) {
-            locale = ULocale.getDefault();
+        // TODO: remove package path
+        if (str.length() <= 100) {
+            if (str.isEmpty()) {
+                return str;
+            }
+            // Collect and apply only changes.
+            // Good if no or few changes.
+            // Bad (slow) if many changes.
+            Edits edits = new Edits();
+            StringBuilder replacementChars = com.ibm.icu.text.CaseMap.toLower(
+                    locale, com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT, str,
+                    new StringBuilder(), edits);
+            return applyEdits(str, replacementChars, edits);
+        } else {
+            return com.ibm.icu.text.CaseMap.toLower(locale, 0, str, new StringBuilder(), null).toString();
          }
-        locCache[0]=0;
-
-        while((c=iter.nextCaseMapCP())>=0) {
-            c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
+    }
  
-            /* decode the result */
-            if(c<0) {
-                /* (not) original code point */
-                c=~c;
-            } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
-                /* mapping already appended to result */
-                continue;
-                /* } else { append single-code point mapping */
+    private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
+        if (!edits.hasChanges()) {
+            return str;
+        }
+        StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
+        for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
+            if (ei.hasChange()) {
+                int i = ei.replacementIndex();
+                result.append(replacementChars, i, i + ei.newLength());
+            } else {
+                int i = ei.sourceIndex();
+                result.append(str, i, i + ei.oldLength());
              }
-            result.appendCodePoint(c);
          }
          return result.toString();
      }
@@ -5063,13 +5072,12 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
              int options) {
          StringContextIterator iter = new StringContextIterator(str);
          StringBuilder result = new StringBuilder(str.length());
-        int[] locCache = new int[1];
          int c, nc, srcLength = str.length();
  
          if (locale == null) {
              locale = ULocale.getDefault();
          }
-        locCache[0]=0;
+        int caseLocale = UCaseProps.getCaseLocale(locale);
  
          if(titleIter == null) {
              titleIter = BreakIterator.getWordInstance(locale);
@@ -5130,7 +5138,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
                  if(titleStart<index) {
                      FirstIJ = true;
                      /* titlecase c which is from titleStart */
-                    c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
+                    c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, caseLocale);
  
                      /* decode the result and lowercase up to index */
                      for(;;) {
@@ -5166,8 +5174,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection
                                  FirstIJ = false;
                              } else {
                                  /* Normal operation: Lowercase the rest of the word. */
-                                c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
-                                        locCache);
+                                c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, caseLocale);
                              }
                          } else {
                              break;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java

index 1bd6486ffd0268c6e5e363a8744897cbd503490d..6eb64d62ff6975299cc817f4dd5c964907fab5b0 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java
@@ -2,10 +2,140 @@
  // License & terms of use: http://www.unicode.org/copyright.html#License
  package com.ibm.icu.text;
  
+import com.ibm.icu.impl.UCaseProps;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.util.ULocale;
+
+// TODO: issues/questions
+// - add java.util.Locale overloads when signatures are settled
+// - optimizing strategies for unstyled text: stop after number of changes or length of replacement?
+
  /**
+ * Low-level case mapping functions.
+ *
   * @draft ICU 59
   * @provisional This API might change or be removed in a future release.
   */
  public final class CaseMap {
+    /**
+     * Omit unchanged text when case-mapping with Edits.
+     *
+     * @draft ICU 59
+     * @provisional This API might change or be removed in a future release.
+     */
+    public static final int OMIT_UNCHANGED_TEXT = 0x4000;
+
+    /**
+     * Lowercases a string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     *
+     * @param locale    The locale ID.
+     * @param options   Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
+     * @param src       The original string.
+     * @param dest      A buffer for the result string. Must not be null.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  This function calls edits.reset() first. edits can be null.
+     * @return dest with the result string (or only changes) appended.
+     *
+     * @see UCharacter#toLowerCase(ULocale, String)
+     * @draft ICU 59
+     * @provisional This API might change or be removed in a future release.
+     */
+     public static <A extends Appendable> A toLower(
+             ULocale locale, int options, CharSequence src, A dest, Edits edits) {
+         if (locale == null) {
+             locale = ULocale.getDefault();
+         }
+         int caseLocale = UCaseProps.getCaseLocale(locale);
+         // TODO: remove package path
+         return com.ibm.icu.impl.CaseMap.toLower(caseLocale, options, src, dest, edits);
+     }
+
+    /**
+     * Uppercases a string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     *
+     * @param locale    The locale ID.
+     * @param options   Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
+     * @param src       The original string.
+     * @param dest      A buffer for the result string. Must not be null.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  This function calls edits.reset() first. edits can be null.
+     * @return dest with the result string (or only changes) appended.
+     *
+     * @see UCharacter#toUpperCase(ULocale, String)
+     * @draft ICU 59
+     * @provisional This API might change or be removed in a future release.
+     */
+     public static <A extends Appendable> A toUpper(
+             ULocale locale, int options, CharSequence src, A dest, Edits edits) {
+         return null;
+     }
+
+    /**
+     * Titlecases a string and optionally records edits.
+     * Casing is locale-dependent and context-sensitive.
+     * The result may be longer or shorter than the original.
+     *
+     * Titlecasing uses a break iterator to find the first characters of words
+     * that are to be titlecased. It titlecases those characters and lowercases
+     * all others. (This can be modified with options bits.)
+     *
+     * @param locale    The locale ID.
+     * @param options   Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
+     *                  {@link UCharacter#TITLECASE_NO_LOWERCASE},
+     *                  {@link UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT}.
+     * @param iter      A break iterator to find the first characters of words that are to be titlecased.
+     *                  It is set to the source string (setText())
+     *                  and used one or more times for iteration (first() and next()).
+     *                  If null, then a word break iterator for the locale is used
+     *                  (or something equivalent).
+     * @param src       The original string.
+     * @param dest      A buffer for the result string. Must not be null.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  This function calls edits.reset() first. edits can be null.
+     * @return dest with the result string (or only changes) appended.
+     *
+     * @see UCharacter#toTitleCase(ULocale, String, BreakIterator, int)
+     * @draft ICU 59
+     * @provisional This API might change or be removed in a future release.
+     */
+     public static <A extends Appendable> A toTitle(
+             ULocale locale, int options, BreakIterator iter,
+             CharSequence src, A dest, Edits edits) {
+         return null;
+     }
  
+    /**
+     * Case-folds a string and optionally records edits.
+     *
+     * Case-folding is locale-independent and not context-sensitive,
+     * but there is an option for whether to include or exclude mappings for dotted I
+     * and dotless i that are marked with 'T' in CaseFolding.txt.
+     *
+     * The result may be longer or shorter than the original.
+     *
+     * @param options   Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
+     *                  {@link UCharacter#FOLD_CASE_DEFAULT},
+     *                  {@link UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I}.
+     * @param src       The original string.
+     * @param dest      A buffer for the result string. Must not be null.
+     * @param edits     Records edits for index mapping, working with styled text,
+     *                  and getting only changes (if any).
+     *                  This function calls edits.reset() first. edits can be null.
+     * @return dest with the result string (or only changes) appended.
+     *
+     * @see UCharacter#foldCase(String, int)
+     * @draft ICU 59
+     * @provisional This API might change or be removed in a future release.
+     */
+     public static <A extends Appendable> A foldCase(
+             int options, CharSequence src, A dest, Edits edits) {
+         return null;
+     }
  }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java

index 7d1ffd091cb74753b67571276c621776a08cc744..b1239527c14a0af439787d5b428e373b3fa77652 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java
@@ -10,9 +10,6 @@ import java.util.Arrays;
   * Supports replacements, insertions, deletions in linear progression.
   * Does not support moving/reordering of text.
   *
- * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
- * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
- *
   * @draft ICU 59
   * @provisional This API might change or be removed in a future release.
   */
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java

index 38510c39899abd113868bc958f778e0d4fdd18d4..4443beb8d809710ae1412f5ae4cdaa4b2f2006ed 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java
@@ -3866,7 +3866,6 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
              int n = getRangeCount();
              int result;
              StringBuilder full = new StringBuilder();
-            int locCache[] = new int[1];
  
              for (int i=0; i<n; ++i) {
                  int start = getRangeStart(i);
@@ -3881,13 +3880,13 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                      // add case mappings
                      // (does not add long s for regular s, or Kelvin for k, for example)
                      for (int cp=start; cp<=end; ++cp) {
-                        result = csp.toFullLower(cp, null, full, root, locCache);
+                        result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
                          addCaseMapping(foldSet, result, full);
  
-                        result = csp.toFullTitle(cp, null, full, root, locCache);
+                        result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
                          addCaseMapping(foldSet, result, full);
  
-                        result = csp.toFullUpper(cp, null, full, root, locCache);
+                        result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
                          addCaseMapping(foldSet, result, full);
  
                          result = csp.toFullFolding(cp, full, 0);
@@ -3906,6 +3905,7 @@ public class UnicodeSet extends UnicodeFilter implements Iterable<String>, Compa
                  } else {
                      BreakIterator bi = BreakIterator.getWordInstance(root);
                      for (String str : strings) {
+                        // TODO: call lower-level functions
                          foldSet.add(UCharacter.toLowerCase(root, str));
                          foldSet.add(UCharacter.toTitleCase(root, str, bi));
                          foldSet.add(UCharacter.toUpperCase(root, str));
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java

index 95bb60b32b3473db48b187b921225fc6a33729e4..dfed35266539316965b4252b161d4888d674e556 100644 (file)
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java
@@ -44,7 +44,7 @@ class LowercaseTransliterator extends Transliterator{
      private final UCaseProps csp;
      private ReplaceableContextIterator iter;
      private StringBuilder result;
-    private int[] locCache;
+    private int caseLocale;
  
      /**
       * Constructs a transliterator.
@@ -56,8 +56,7 @@ class LowercaseTransliterator extends Transliterator{
          csp=UCaseProps.INSTANCE;
          iter=new ReplaceableContextIterator();
          result = new StringBuilder();
-        locCache = new int[1];
-        locCache[0]=0;
+        caseLocale = UCaseProps.getCaseLocale(locale);
      }
  
      /**
@@ -85,7 +84,7 @@ class LowercaseTransliterator extends Transliterator{
          iter.setLimit(offsets.limit);
          iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
          while((c=iter.nextCaseMapCP())>=0) {
-            c=csp.toFullLower(c, iter, result, locale, locCache);
+            c=csp.toFullLower(c, iter, result, caseLocale);
  
              if(iter.didReachLimit() && isIncremental) {
                  // the case mapping function tried to look beyond the context limit
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java

index d3dc29681058c23b33802319aabaef4c27d83396..96f11c8e2931822c6f2271f107e06aac70b535a0 100644 (file)
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java
@@ -42,7 +42,7 @@ class TitlecaseTransliterator extends Transliterator {
      private final UCaseProps csp;
      private ReplaceableContextIterator iter;
      private StringBuilder result;
-    private int[] locCache;
+    private int caseLocale;
  
     /**
       * Constructs a transliterator.
@@ -55,8 +55,7 @@ class TitlecaseTransliterator extends Transliterator {
          csp=UCaseProps.INSTANCE;
          iter=new ReplaceableContextIterator();
          result = new StringBuilder();
-        locCache = new int[1];
-        locCache[0]=0;
+        caseLocale = UCaseProps.getCaseLocale(locale);
      }
  
      /**
@@ -119,9 +118,9 @@ class TitlecaseTransliterator extends Transliterator {
              type=csp.getTypeOrIgnorable(c);
              if(type>=0) { // not case-ignorable
                  if(doTitle) {
-                    c=csp.toFullTitle(c, iter, result, locale, locCache);
+                    c=csp.toFullTitle(c, iter, result, caseLocale);
                  } else {
-                    c=csp.toFullLower(c, iter, result, locale, locCache);
+                    c=csp.toFullLower(c, iter, result, caseLocale);
                  }
                  doTitle = type==0; // doTitle=isUncased
  
diff --git a/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java b/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java

index 77e2dfd7073c2a61da101a0076b2d9087601b180..bd9e3fed38a955ec011ca31a1a0281f182474095 100644 (file)
--- a/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java
+++ b/icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java
@@ -41,7 +41,7 @@ class UppercaseTransliterator extends Transliterator {
      private final UCaseProps csp;
      private ReplaceableContextIterator iter;
      private StringBuilder result;
-    private int[] locCache;
+    private int caseLocale;
  
      /**
       * Constructs a transliterator.
@@ -52,8 +52,7 @@ class UppercaseTransliterator extends Transliterator {
          csp=UCaseProps.INSTANCE;
          iter=new ReplaceableContextIterator();
          result = new StringBuilder();
-        locCache = new int[1];
-        locCache[0]=0;
+        caseLocale = UCaseProps.getCaseLocale(locale);
      }
  
      /**
@@ -81,7 +80,7 @@ class UppercaseTransliterator extends Transliterator {
          iter.setLimit(offsets.limit);
          iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
          while((c=iter.nextCaseMapCP())>=0) {
-            c=csp.toFullUpper(c, iter, result, locale, locCache);
+            c=csp.toFullUpper(c, iter, result, caseLocale);
  
              if(iter.didReachLimit() && isIncremental) {
                  // the case mapping function tried to look beyond the context limit
author	Markus Scherer <markus.icu@gmail.com>
	Wed, 25 Jan 2017 23:52:55 +0000 (23:52 +0000)
committer	Markus Scherer <markus.icu@gmail.com>
	Wed, 25 Jan 2017 23:52:55 +0000 (23:52 +0000)
icu4c/source/common/ustrcase.cpp		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMap.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/impl/UCaseProps.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/Edits.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/UnicodeSet.java		patch \| blob \| history
icu4j/main/classes/translit/src/com/ibm/icu/text/LowercaseTransliterator.java		patch \| blob \| history
icu4j/main/classes/translit/src/com/ibm/icu/text/TitlecaseTransliterator.java		patch \| blob \| history
icu4j/main/classes/translit/src/com/ibm/icu/text/UppercaseTransliterator.java		patch \| blob \| history