From: Markus Scherer Date: Mon, 6 Feb 2017 22:31:15 +0000 (+0000) Subject: ICU-12410 toUpper() with Edits X-Git-Tag: release-59-rc~145^2~10 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2798a5aa3e2bf28712e6a8d7072e967b4f49d4f0;p=icu ICU-12410 toUpper() with Edits X-SVN-Rev: 39646 --- diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java index a17bf786b37..6c86968f460 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java @@ -6,7 +6,6 @@ import java.io.IOException; import com.ibm.icu.text.Edits; import com.ibm.icu.util.ICUUncheckedIOException; -import com.ibm.icu.util.ULocale; public final class CaseMapImpl { /** @@ -210,25 +209,22 @@ public final class CaseMapImpl { } } - public static String toUpper(ULocale locale, String str) { + public static A toUpper(int caseLocale, int options, + CharSequence src, A dest, Edits edits) { try { - int options = 0; Edits edits = null; // TODO - if (locale == null) { - locale = ULocale.getDefault(); + if (edits != null) { + edits.reset(); } - int caseLocale = UCaseProps.getCaseLocale(locale); if (caseLocale == UCaseProps.LOC_GREEK) { - return GreekUpper.toUpper(str); + return GreekUpper.toUpper(options, src, dest, edits); } - - StringContextIterator iter = new StringContextIterator(str); - StringBuilder result = new StringBuilder(str.length()); + StringContextIterator iter = new StringContextIterator(src); int c; - while((c=iter.nextCaseMapCP())>=0) { - c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, caseLocale); - appendResult(c, result, iter.getCPLength(), options, edits); + while ((c = iter.nextCaseMapCP()) >= 0) { + c = UCaseProps.INSTANCE.toFullUpper(c, iter, dest, caseLocale); + appendResult(c, dest, iter.getCPLength(), options, edits); } - return result.toString(); + return dest; } catch (IOException e) { throw new ICUUncheckedIOException(e); } @@ -737,12 +733,11 @@ public final class CaseMapImpl { *

Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8). * @throws IOException */ - private static String toUpper(CharSequence s) throws IOException { - int options = 0; Edits edits = null; // TODO - StringBuilder result = new StringBuilder(s.length()); + private static A toUpper(int options, + CharSequence src, A dest, Edits edits) throws IOException { int state = 0; - for (int i = 0; i < s.length();) { - int c = Character.codePointAt(s, i); + for (int i = 0; i < src.length();) { + int c = Character.codePointAt(src, i); int nextIndex = i + Character.charCount(c); int nextState = 0; int type = UCaseProps.INSTANCE.getTypeOrIgnorable(c); @@ -771,8 +766,8 @@ public final class CaseMapImpl { numYpogegrammeni = 1; } // Skip combining diacritics after this Greek letter. - while (nextIndex < s.length()) { - int diacriticData = getDiacriticData(s.charAt(nextIndex)); + while (nextIndex < src.length()) { + int diacriticData = getDiacriticData(src.charAt(nextIndex)); if (diacriticData != 0) { data |= diacriticData; if ((diacriticData & HAS_YPOGEGRAMMENI) != 0) { @@ -792,7 +787,7 @@ public final class CaseMapImpl { (data & HAS_ACCENT) != 0 && numYpogegrammeni == 0 && (state & AFTER_CASED) == 0 && - !isFollowedByCasedLetter(s, nextIndex)) { + !isFollowedByCasedLetter(src, nextIndex)) { // Keep disjunctive "or" with (only) a tonos. // We use the same "word boundary" conditions as for the Final_Sigma test. if (i == nextIndex) { @@ -810,25 +805,59 @@ public final class CaseMapImpl { data &= ~HAS_EITHER_DIALYTIKA; } } - result.appendCodePoint(upper); - if ((data & HAS_EITHER_DIALYTIKA) != 0) { - result.append('\u0308'); // restore or add a dialytika - } - if (addTonos) { - result.append('\u0301'); + + boolean change; + if (edits == null) { + change = true; // common, simple usage + } else { + // Find out first whether we are changing the text. + change = src.charAt(i) != upper || numYpogegrammeni > 0; + int i2 = i + 1; + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + change |= i2 >= nextIndex || src.charAt(i2) != 0x308; + ++i2; + } + if (addTonos) { + change |= i2 >= nextIndex || src.charAt(i2) != 0x301; + ++i2; + } + int oldLength = nextIndex - i; + int newLength = (i2 - i) + numYpogegrammeni; + change |= oldLength != newLength; + if (change) { + if (edits != null) { + edits.addReplace(oldLength, newLength); + } + } else { + if (edits != null) { + edits.addUnchanged(oldLength); + } + // Write unchanged text? + change = (options & OMIT_UNCHANGED_TEXT) == 0; + } } - while (numYpogegrammeni > 0) { - result.append('Ι'); - --numYpogegrammeni; + + if (change) { + dest.append((char)upper); + if ((data & HAS_EITHER_DIALYTIKA) != 0) { + dest.append('\u0308'); // restore or add a dialytika + } + if (addTonos) { + dest.append('\u0301'); + } + while (numYpogegrammeni > 0) { + dest.append('Ι'); + --numYpogegrammeni; + } } } else { - c = UCaseProps.INSTANCE.toFullUpper(c, null, result, UCaseProps.LOC_GREEK); - appendResult(c, result, nextIndex - i, options, edits); + c = UCaseProps.INSTANCE.toFullUpper(c, null, dest, UCaseProps.LOC_GREEK); + appendResult(c, dest, nextIndex - i, options, edits); } i = nextIndex; state = nextState; } - return result.toString(); + return dest; } } } diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java index 88a01d70be7..5ebde55596c 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java @@ -29,7 +29,6 @@ import com.ibm.icu.impl.UPropertyAliases; import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.CaseMap; import com.ibm.icu.text.Edits; import com.ibm.icu.text.Normalizer2; import com.ibm.icu.util.RangeValueIterator; @@ -4877,7 +4876,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toUpperCase(String str) { - return toUpperCase(ULocale.getDefault(), str); + return toUpperCase(getDefaultCaseLocale(), str); } /** @@ -4889,7 +4888,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toLowerCase(String str) { - return toLowerCase(ULocale.getDefault(), str); + return toLowerCase(getDefaultCaseLocale(), str); } /** @@ -4912,7 +4911,74 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toTitleCase(String str, BreakIterator breakiter) { - return toTitleCase(ULocale.getDefault(), str, breakiter); + return toTitleCase(Locale.getDefault(), str, breakiter, 0); + } + + private static int getDefaultCaseLocale() { + return UCaseProps.getCaseLocale(Locale.getDefault()); + } + + private static int getCaseLocale(Locale locale) { + if (locale == null) { + locale = Locale.getDefault(); + } + return UCaseProps.getCaseLocale(locale); + } + + private static int getCaseLocale(ULocale locale) { + if (locale == null) { + locale = ULocale.getDefault(); + } + return UCaseProps.getCaseLocale(locale); + } + + private static String toLowerCase(int caseLocale, String str) { + if (str.length() <= 100) { + if (str.isEmpty()) { + return str; + } + // Collect and apply only changes. + // Good if no or few changes. Bad (slow) if many changes. + Edits edits = new Edits(); + StringBuilder replacementChars = CaseMapImpl.toLower( + caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); + return applyEdits(str, replacementChars, edits); + } else { + return CaseMapImpl.toLower(caseLocale, 0, str, new StringBuilder(), null).toString(); + } + } + + private static String toUpperCase(int caseLocale, String str) { + if (str.length() <= 100) { + if (str.isEmpty()) { + return str; + } + // Collect and apply only changes. + // Good if no or few changes. Bad (slow) if many changes. + Edits edits = new Edits(); + StringBuilder replacementChars = CaseMapImpl.toUpper( + caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); + return applyEdits(str, replacementChars, edits); + } else { + return CaseMapImpl.toUpper(caseLocale, 0, str, new StringBuilder(), null).toString(); + } + } + + private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) { + if (!edits.hasChanges()) { + return str; + } + StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta()); + for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { + if (ei.hasChange()) { + int i = ei.replacementIndex(); + result.append(replacementChars, i, i + ei.newLength()); + } else { + int i = ei.sourceIndex(); + result.append(str, i, i + ei.oldLength()); + } + } + return result.toString(); } /** @@ -4925,7 +4991,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toUpperCase(Locale locale, String str) { - return toUpperCase(ULocale.forLocale(locale), str); + return toUpperCase(getCaseLocale(locale), str); } /** @@ -4937,7 +5003,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection * @stable ICU 3.2 */ public static String toUpperCase(ULocale locale, String str) { - return CaseMapImpl.toUpper(locale, str); + return toUpperCase(getCaseLocale(locale), str); } /** @@ -4950,20 +5016,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toLowerCase(Locale locale, String str) { - if (str.length() <= 100) { - if (str.isEmpty()) { - return str; - } - // Collect and apply only changes. - // Good if no or few changes. - // Bad (slow) if many changes. - Edits edits = new Edits(); - StringBuilder replacementChars = CaseMap.toLower().omitUnchangedText().apply( - locale, str, new StringBuilder(), edits); - return applyEdits(str, replacementChars, edits); - } else { - return CaseMap.toLower().apply(locale, str, new StringBuilder(), null).toString(); - } + return toLowerCase(getCaseLocale(locale), str); } /** @@ -4975,24 +5028,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection * @stable ICU 3.2 */ public static String toLowerCase(ULocale locale, String str) { - return toLowerCase(locale.toLocale(), str); - } - - private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) { - if (!edits.hasChanges()) { - return str; - } - StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta()); - for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { - if (ei.hasChange()) { - int i = ei.replacementIndex(); - result.append(replacementChars, i, i + ei.newLength()); - } else { - int i = ei.sourceIndex(); - result.append(str, i, i + ei.oldLength()); - } - } - return result.toString(); + return toLowerCase(getCaseLocale(locale), str); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java index 07fd011d5cf..e1747b635c4 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java @@ -31,6 +31,13 @@ public abstract class CaseMap { private CaseMap(int opt) { internalOptions = opt; } + private static int getCaseLocale(Locale locale) { + if (locale == null) { + locale = Locale.getDefault(); + } + return UCaseProps.getCaseLocale(locale); + } + /** * @return Lowercasing object with default options. * @draft ICU 59 @@ -108,11 +115,7 @@ public abstract class CaseMap { */ public A apply( Locale locale, CharSequence src, A dest, Edits edits) { - if (locale == null) { - locale = Locale.getDefault(); - } - int caseLocale = UCaseProps.getCaseLocale(locale); - return CaseMapImpl.toLower(caseLocale, internalOptions, src, dest, edits); + return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src, dest, edits); } } @@ -158,7 +161,7 @@ public abstract class CaseMap { */ public A apply( Locale locale, CharSequence src, A dest, Edits edits) { - return null; + return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src, dest, edits); } }