From: Markus Scherer Date: Thu, 14 Sep 2017 22:26:13 +0000 (+0000) Subject: ICU-13247 Java: String CaseMap.apply(CharSequence); fix omitUnchangedText() without... X-Git-Tag: release-60-rc~139 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ee7fb909bf0a8595a1a6c58f5787d55c566b6016;p=icu ICU-13247 Java: String CaseMap.apply(CharSequence); fix omitUnchangedText() without Edits X-SVN-Rev: 40417 --- diff --git a/icu4c/source/common/ucasemap.cpp b/icu4c/source/common/ucasemap.cpp index 6e550aadcd8..7ad4c315f8e 100644 --- a/icu4c/source/common/ucasemap.cpp +++ b/icu4c/source/common/ucasemap.cpp @@ -165,9 +165,9 @@ appendResult(uint8_t *dest, int32_t destIndex, int32_t destCapacity, /* (not) original code point */ if(edits!=NULL) { edits->addUnchanged(cpLength); - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; } c=~result; if(destIndex0) { if(edits!=NULL) { edits->addUnchanged(length); - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; } if(length>(INT32_MAX-destIndex)) { return -1; // integer overflow @@ -628,8 +628,10 @@ int32_t toUpper(uint32_t options, } } - UBool change = TRUE; - if (edits != NULL) { + UBool change; + if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { + change = TRUE; // common, simple usage + } else { // Find out first whether we are changing the text. U_ASSERT(0x370 <= upper && upper <= 0x3ff); // 2-byte UTF-8, main Greek block change = (i + 2) > nextIndex || diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index a96c57cef2e..b1beb342778 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -73,9 +73,9 @@ appendResult(UChar *dest, int32_t destIndex, int32_t destCapacity, /* (not) original code point */ if(edits!=NULL) { edits->addUnchanged(cpLength); - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; } c=~result; if(destIndex0) { if(edits!=NULL) { edits->addUnchanged(length); - if(options & U_OMIT_UNCHANGED_TEXT) { - return destIndex; - } + } + if(options & U_OMIT_UNCHANGED_TEXT) { + return destIndex; } if(length>(INT32_MAX-destIndex)) { return -1; // integer overflow @@ -934,8 +934,10 @@ int32_t toUpper(uint32_t options, } } - UBool change = TRUE; - if (edits != NULL) { + UBool change; + if (edits == nullptr && (options & U_OMIT_UNCHANGED_TEXT) == 0) { + change = TRUE; // common, simple usage + } else { // Find out first whether we are changing the text. change = src[i] != upper || numYpogegrammeni > 0; int32_t i2 = i + 1; diff --git a/icu4c/source/test/intltest/strcase.cpp b/icu4c/source/test/intltest/strcase.cpp index 35513052461..8f7a57d2b41 100644 --- a/icu4c/source/test/intltest/strcase.cpp +++ b/icu4c/source/test/intltest/strcase.cpp @@ -62,6 +62,8 @@ public: void TestMergeEdits(); void TestCaseMapWithEdits(); void TestCaseMapUTF8WithEdits(); + void TestCaseMapToString(); + void TestCaseMapUTF8ToString(); void TestLongUnicodeString(); void TestBug13127(); void TestInPlaceTitle(); @@ -102,6 +104,8 @@ StringCaseTest::runIndexedTest(int32_t index, UBool exec, const char *&name, cha TESTCASE_AUTO(TestMergeEdits); TESTCASE_AUTO(TestCaseMapWithEdits); TESTCASE_AUTO(TestCaseMapUTF8WithEdits); + TESTCASE_AUTO(TestCaseMapToString); + TESTCASE_AUTO(TestCaseMapUTF8ToString); TESTCASE_AUTO(TestLongUnicodeString); #if !UCONFIG_NO_BREAK_ITERATION TESTCASE_AUTO(TestBug13127); @@ -1216,7 +1220,7 @@ void StringCaseTest::TestMergeEdits() { } void StringCaseTest::TestCaseMapWithEdits() { - IcuTestErrorCode errorCode(*this, "TestEdits"); + IcuTestErrorCode errorCode(*this, "TestCaseMapWithEdits"); UChar dest[20]; Edits edits; @@ -1258,7 +1262,7 @@ void StringCaseTest::TestCaseMapWithEdits() { U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, - NULL, u"IjssEL IglOo", 12, + nullptr, u"IjssEL IglOo", 12, dest, UPRV_LENGTHOF(dest), &edits, errorCode); assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); static const EditChange titleExpectedChanges[] = { @@ -1338,7 +1342,7 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() { U_OMIT_UNCHANGED_TEXT | U_TITLECASE_NO_BREAK_ADJUSTMENT | U_TITLECASE_NO_LOWERCASE, - NULL, u8"IjssEL IglOo", 12, + nullptr, u8"IjssEL IglOo", 12, dest, UPRV_LENGTHOF(dest), &edits, errorCode); assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), UnicodeString::fromUTF8(StringPiece(dest, length))); @@ -1377,6 +1381,114 @@ void StringCaseTest::TestCaseMapUTF8WithEdits() { TRUE, errorCode); } +void StringCaseTest::TestCaseMapToString() { + // This test function name is parallel with one in UCharacterCaseTest.java. + // It is a bit of a misnomer until we have CaseMap API that writes to + // a UnicodeString, at which point we should change this code here. + IcuTestErrorCode errorCode(*this, "TestCaseMapToString"); + UChar dest[20]; + + // Omit unchanged text. + int32_t length = CaseMap::toLower("tr", U_OMIT_UNCHANGED_TEXT, + u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toLower(IstanBul)", + UnicodeString(u"ıb"), UnicodeString(TRUE, dest, length)); + length = CaseMap::toUpper("el", U_OMIT_UNCHANGED_TEXT, + u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toUpper(Πατάτα)", + UnicodeString(u"ΑΤΑΤΑ"), UnicodeString(TRUE, dest, length)); +#if !UCONFIG_NO_BREAK_ITERATION + length = CaseMap::toTitle("nl", + U_OMIT_UNCHANGED_TEXT | + U_TITLECASE_NO_BREAK_ADJUSTMENT | + U_TITLECASE_NO_LOWERCASE, + nullptr, u"IjssEL IglOo", 12, + dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toTitle(IjssEL IglOo)", + UnicodeString(u"J"), UnicodeString(TRUE, dest, length)); +#endif + length = CaseMap::fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, + u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"foldCase(IßtanBul)", + UnicodeString(u"ıssb"), UnicodeString(TRUE, dest, length)); + + // Return the whole result string. + length = CaseMap::toLower("tr", 0, + u"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toLower(IstanBul)", + UnicodeString(u"ıstanbul"), UnicodeString(TRUE, dest, length)); + length = CaseMap::toUpper("el", 0, + u"Πατάτα", 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toUpper(Πατάτα)", + UnicodeString(u"ΠΑΤΑΤΑ"), UnicodeString(TRUE, dest, length)); +#if !UCONFIG_NO_BREAK_ITERATION + length = CaseMap::toTitle("nl", + U_TITLECASE_NO_BREAK_ADJUSTMENT | + U_TITLECASE_NO_LOWERCASE, + nullptr, u"IjssEL IglOo", 12, + dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toTitle(IjssEL IglOo)", + UnicodeString(u"IJssEL IglOo"), UnicodeString(TRUE, dest, length)); +#endif + length = CaseMap::fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, + u"IßtanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"foldCase(IßtanBul)", + UnicodeString(u"ısstanbul"), UnicodeString(TRUE, dest, length)); +} + +void StringCaseTest::TestCaseMapUTF8ToString() { + IcuTestErrorCode errorCode(*this, "TestCaseMapUTF8ToString"); + // TODO: Change this to writing to string via ByteSink when that is available. + char dest[50]; + + // Omit unchanged text. + int32_t length = CaseMap::utf8ToLower("tr", U_OMIT_UNCHANGED_TEXT, + u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıb"), + UnicodeString::fromUTF8(StringPiece(dest, length))); + length = CaseMap::utf8ToUpper("el", U_OMIT_UNCHANGED_TEXT, + u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΑΤΑΤΑ"), + UnicodeString::fromUTF8(StringPiece(dest, length))); +#if !UCONFIG_NO_BREAK_ITERATION + length = CaseMap::utf8ToTitle("nl", + U_OMIT_UNCHANGED_TEXT | + U_TITLECASE_NO_BREAK_ADJUSTMENT | + U_TITLECASE_NO_LOWERCASE, + nullptr, u8"IjssEL IglOo", 12, + dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"J"), + UnicodeString::fromUTF8(StringPiece(dest, length))); +#endif + length = CaseMap::utf8Fold(U_OMIT_UNCHANGED_TEXT | U_FOLD_CASE_EXCLUDE_SPECIAL_I, + u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ıssb"), + UnicodeString::fromUTF8(StringPiece(dest, length))); + + // Return the whole result string. + length = CaseMap::utf8ToLower("tr", 0, + u8"IstanBul", 8, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toLower(IstanBul)", UnicodeString(u"ıstanbul"), + UnicodeString::fromUTF8(StringPiece(dest, length))); + length = CaseMap::utf8ToUpper("el", 0, + u8"Πατάτα", 6 * 2, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toUpper(Πατάτα)", UnicodeString(u"ΠΑΤΑΤΑ"), + UnicodeString::fromUTF8(StringPiece(dest, length))); +#if !UCONFIG_NO_BREAK_ITERATION + length = CaseMap::utf8ToTitle("nl", + U_TITLECASE_NO_BREAK_ADJUSTMENT | + U_TITLECASE_NO_LOWERCASE, + nullptr, u8"IjssEL IglOo", 12, + dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"toTitle(IjssEL IglOo)", UnicodeString(u"IJssEL IglOo"), + UnicodeString::fromUTF8(StringPiece(dest, length))); +#endif + length = CaseMap::utf8Fold(U_FOLD_CASE_EXCLUDE_SPECIAL_I, + u8"IßtanBul", 1 + 2 + 6, dest, UPRV_LENGTHOF(dest), nullptr, errorCode); + assertEquals(u"foldCase(IßtanBul)", UnicodeString(u"ısstanbul"), + UnicodeString::fromUTF8(StringPiece(dest, length))); +} + void StringCaseTest::TestLongUnicodeString() { // Code coverage for UnicodeString case mapping code handling // long strings or many changes in a string. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java index 4963bb7f9e8..f6ab77dbb81 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/impl/CaseMapImpl.java @@ -318,6 +318,11 @@ public final class CaseMapImpl { length = newText.getEndIndex(); } + @Override + public void setText(CharSequence newText) { + length = newText.length(); + } + @Override public void setText(String newText) { length = newText.length(); @@ -346,9 +351,9 @@ public final class CaseMapImpl { // (not) original code point if (edits != null) { edits.addUnchanged(cpLength); - if ((options & OMIT_UNCHANGED_TEXT) != 0) { - return; - } + } + if ((options & OMIT_UNCHANGED_TEXT) != 0) { + return; } appendCodePoint(dest, ~result); } else if (result <= UCaseProps.MAX_STRING_LENGTH) { @@ -370,14 +375,31 @@ public final class CaseMapImpl { if (length > 0) { if (edits != null) { edits.addUnchanged(length); - if ((options & OMIT_UNCHANGED_TEXT) != 0) { - return; - } + } + if ((options & OMIT_UNCHANGED_TEXT) != 0) { + return; } dest.append(src, start, start + length); } } + private static String applyEdits(CharSequence src, StringBuilder replacementChars, Edits edits) { + if (!edits.hasChanges()) { + return src.toString(); + } + StringBuilder result = new StringBuilder(src.length() + edits.lengthDelta()); + for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { + if (ei.hasChange()) { + int i = ei.replacementIndex(); + result.append(replacementChars, i, i + ei.newLength()); + } else { + int i = ei.sourceIndex(); + result.append(src, i, i + ei.oldLength()); + } + } + return result.toString(); + } + private static void internalToLower(int caseLocale, int options, StringContextIterator iter, Appendable dest, Edits edits) throws IOException { int c; @@ -387,6 +409,23 @@ public final class CaseMapImpl { } } + public static String toLower(int caseLocale, int options, CharSequence src) { + if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { + if (src.length() == 0) { + return src.toString(); + } + // Collect and apply only changes. + // Good if no or few changes. Bad (slow) if many changes. + Edits edits = new Edits(); + StringBuilder replacementChars = toLower( + caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); + return applyEdits(src, replacementChars, edits); + } else { + return toLower(caseLocale, options, src, + new StringBuilder(src.length()), null).toString(); + } + } + public static A toLower(int caseLocale, int options, CharSequence src, A dest, Edits edits) { try { @@ -401,6 +440,23 @@ public final class CaseMapImpl { } } + public static String toUpper(int caseLocale, int options, CharSequence src) { + if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { + if (src.length() == 0) { + return src.toString(); + } + // Collect and apply only changes. + // Good if no or few changes. Bad (slow) if many changes. + Edits edits = new Edits(); + StringBuilder replacementChars = toUpper( + caseLocale, options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); + return applyEdits(src, replacementChars, edits); + } else { + return toUpper(caseLocale, options, src, + new StringBuilder(src.length()), null).toString(); + } + } + public static A toUpper(int caseLocale, int options, CharSequence src, A dest, Edits edits) { try { @@ -422,6 +478,24 @@ public final class CaseMapImpl { } } + public static String toTitle(int caseLocale, int options, BreakIterator iter, CharSequence src) { + if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { + if (src.length() == 0) { + return src.toString(); + } + // Collect and apply only changes. + // Good if no or few changes. Bad (slow) if many changes. + Edits edits = new Edits(); + StringBuilder replacementChars = toTitle( + caseLocale, options | OMIT_UNCHANGED_TEXT, iter, src, + new StringBuilder(), edits); + return applyEdits(src, replacementChars, edits); + } else { + return toTitle(caseLocale, options, iter, src, + new StringBuilder(src.length()), null).toString(); + } + } + public static A toTitle( int caseLocale, int options, BreakIterator titleIter, CharSequence src, A dest, Edits edits) { @@ -533,6 +607,22 @@ public final class CaseMapImpl { } } + public static String fold(int options, CharSequence src) { + if (src.length() <= 100 && (options & OMIT_UNCHANGED_TEXT) == 0) { + if (src.length() == 0) { + return src.toString(); + } + // Collect and apply only changes. + // Good if no or few changes. Bad (slow) if many changes. + Edits edits = new Edits(); + StringBuilder replacementChars = fold( + options | OMIT_UNCHANGED_TEXT, src, new StringBuilder(), edits); + return applyEdits(src, replacementChars, edits); + } else { + return fold(options, src, new StringBuilder(src.length()), null).toString(); + } + } + public static A fold(int options, CharSequence src, A dest, Edits edits) { try { @@ -1131,7 +1221,7 @@ public final class CaseMapImpl { } boolean change; - if (edits == null) { + if (edits == null && (options & OMIT_UNCHANGED_TEXT) == 0) { change = true; // common, simple usage } else { // Find out first whether we are changing the text. diff --git a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java index fc970dc790b..af2f2d1c934 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/lang/UCharacter.java @@ -28,7 +28,6 @@ import com.ibm.icu.impl.UPropertyAliases; import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory; import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection; import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.Edits; import com.ibm.icu.text.Normalizer2; import com.ibm.icu.util.RangeValueIterator; import com.ibm.icu.util.ULocale; @@ -4937,7 +4936,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toUpperCase(String str) { - return toUpperCase(getDefaultCaseLocale(), str); + return CaseMapImpl.toUpper(getDefaultCaseLocale(), 0, str); } /** @@ -4949,7 +4948,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toLowerCase(String str) { - return toLowerCase(getDefaultCaseLocale(), str); + return CaseMapImpl.toLower(getDefaultCaseLocale(), 0, str); } /** @@ -4993,75 +4992,6 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection return UCaseProps.getCaseLocale(locale); } - private static String toLowerCase(int caseLocale, String str) { - if (str.length() <= 100) { - if (str.isEmpty()) { - return str; - } - // Collect and apply only changes. - // Good if no or few changes. Bad (slow) if many changes. - Edits edits = new Edits(); - StringBuilder replacementChars = CaseMapImpl.toLower( - caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); - return applyEdits(str, replacementChars, edits); - } else { - return CaseMapImpl.toLower(caseLocale, 0, str, - new StringBuilder(str.length()), null).toString(); - } - } - - private static String toUpperCase(int caseLocale, String str) { - if (str.length() <= 100) { - if (str.isEmpty()) { - return str; - } - // Collect and apply only changes. - // Good if no or few changes. Bad (slow) if many changes. - Edits edits = new Edits(); - StringBuilder replacementChars = CaseMapImpl.toUpper( - caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); - return applyEdits(str, replacementChars, edits); - } else { - return CaseMapImpl.toUpper(caseLocale, 0, str, - new StringBuilder(str.length()), null).toString(); - } - } - - private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) { - if (str.length() <= 100) { - if (str.isEmpty()) { - return str; - } - // Collect and apply only changes. - // Good if no or few changes. Bad (slow) if many changes. - Edits edits = new Edits(); - StringBuilder replacementChars = CaseMapImpl.toTitle( - caseLocale, options | CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str, - new StringBuilder(), edits); - return applyEdits(str, replacementChars, edits); - } else { - return CaseMapImpl.toTitle(caseLocale, options, titleIter, str, - new StringBuilder(str.length()), null).toString(); - } - } - - private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) { - if (!edits.hasChanges()) { - return str; - } - StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta()); - for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) { - if (ei.hasChange()) { - int i = ei.replacementIndex(); - result.append(replacementChars, i, i + ei.newLength()); - } else { - int i = ei.sourceIndex(); - result.append(str, i, i + ei.oldLength()); - } - } - return result.toString(); - } - /** * Returns the uppercase version of the argument string. * Casing is dependent on the argument locale and context-sensitive. @@ -5072,7 +5002,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toUpperCase(Locale locale, String str) { - return toUpperCase(getCaseLocale(locale), str); + return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); } /** @@ -5084,7 +5014,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection * @stable ICU 3.2 */ public static String toUpperCase(ULocale locale, String str) { - return toUpperCase(getCaseLocale(locale), str); + return CaseMapImpl.toUpper(getCaseLocale(locale), 0, str); } /** @@ -5097,7 +5027,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ public static String toLowerCase(Locale locale, String str) { - return toLowerCase(getCaseLocale(locale), str); + return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); } /** @@ -5109,7 +5039,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection * @stable ICU 3.2 */ public static String toLowerCase(ULocale locale, String str) { - return toLowerCase(getCaseLocale(locale), str); + return CaseMapImpl.toLower(getCaseLocale(locale), 0, str); } /** @@ -5190,7 +5120,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection } titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); titleIter.setText(str); - return toTitleCase(getCaseLocale(locale), options, titleIter, str); + return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); } /** @@ -5217,16 +5147,13 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection */ @Deprecated public static String toTitleFirst(ULocale locale, String str) { - return toTitleCase(locale, str, null, - CaseMapImpl.TITLECASE_WHOLE_STRING|TITLECASE_NO_LOWERCASE); - // TODO: Remove this function. - // Move something like the following helper function into CLDR. - // private static final CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = - // CaseMap.toTitle().wholeString().noLowercase(); - // return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply( - // locale.toLocale(), null, str, new StringBuilder(), null).toString(); + // TODO: Remove this function. Inline it where it is called in CLDR. + return TO_TITLE_WHOLE_STRING_NO_LOWERCASE.apply(locale.toLocale(), null, str); } + private static final com.ibm.icu.text.CaseMap.Title TO_TITLE_WHOLE_STRING_NO_LOWERCASE = + com.ibm.icu.text.CaseMap.toTitle().wholeString().noLowercase(); + /** * {@icu}

Returns the titlecase version of the argument string. *

Position for titlecasing is determined by the argument break @@ -5257,7 +5184,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection } titleIter = CaseMapImpl.getTitleBreakIterator(locale, options, titleIter); titleIter.setText(str); - return toTitleCase(getCaseLocale(locale), options, titleIter, str); + return CaseMapImpl.toTitle(getCaseLocale(locale), options, titleIter, str); } /** @@ -5374,19 +5301,7 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection * @stable ICU 2.6 */ public static final String foldCase(String str, int options) { - if (str.length() <= 100) { - if (str.isEmpty()) { - return str; - } - // Collect and apply only changes. - // Good if no or few changes. Bad (slow) if many changes. - Edits edits = new Edits(); - StringBuilder replacementChars = CaseMapImpl.fold( - options | CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits); - return applyEdits(str, replacementChars, edits); - } else { - return CaseMapImpl.fold(options, str, new StringBuilder(str.length()), null).toString(); - } + return CaseMapImpl.fold(options, str); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java index 2f05a967f25..67a13a8e390 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java @@ -92,6 +92,24 @@ public abstract class CaseMap { return OMIT_UNCHANGED; } + /** + * Lowercases a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. Can be null for {@link Locale#getDefault}. + * (See {@link ULocale#toLocale}.) + * @param src The original string. + * @return the result string. + * + * @see UCharacter#toLowerCase(Locale, String) + * @draft ICU 60 + * @provisional This API might change or be removed in a future release. + */ + public String apply(Locale locale, CharSequence src) { + return CaseMapImpl.toLower(getCaseLocale(locale), internalOptions, src); + } + /** * Lowercases a string and optionally records edits (see {@link #omitUnchangedText}). * Casing is locale-dependent and context-sensitive. @@ -138,6 +156,24 @@ public abstract class CaseMap { return OMIT_UNCHANGED; } + /** + * Uppercases a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. Can be null for {@link Locale#getDefault}. + * (See {@link ULocale#toLocale}.) + * @param src The original string. + * @return the result string. + * + * @see UCharacter#toUpperCase(Locale, String) + * @draft ICU 60 + * @provisional This API might change or be removed in a future release. + */ + public String apply(Locale locale, CharSequence src) { + return CaseMapImpl.toUpper(getCaseLocale(locale), internalOptions, src); + } + /** * Uppercases a string and optionally records edits (see {@link #omitUnchangedText}). * Casing is locale-dependent and context-sensitive. @@ -288,6 +324,38 @@ public abstract class CaseMap { internalOptions, CaseMapImpl.TITLECASE_ADJUST_TO_CASED)); } + /** + * Titlecases a string. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + *

Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. Can be null for {@link Locale#getDefault}. + * (See {@link ULocale#toLocale}.) + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setText()) + * and used one or more times for iteration (first() and next()). + * If null, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @return the result string. + * + * @see UCharacter#toUpperCase(Locale, String) + * @draft ICU 60 + * @provisional This API might change or be removed in a future release. + */ + public String apply(Locale locale, BreakIterator iter, CharSequence src) { + if (iter == null && locale == null) { + locale = Locale.getDefault(); + } + iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter); + iter.setText(src); + return CaseMapImpl.toTitle(getCaseLocale(locale), internalOptions, iter, src); + } + /** * Titlecases a string and optionally records edits (see {@link #omitUnchangedText}). * Casing is locale-dependent and context-sensitive. @@ -321,7 +389,7 @@ public abstract class CaseMap { locale = Locale.getDefault(); } iter = CaseMapImpl.getTitleBreakIterator(locale, internalOptions, iter); - iter.setText(src.toString()); + iter.setText(src); return CaseMapImpl.toTitle( getCaseLocale(locale), internalOptions, iter, src, dest, edits); } @@ -372,13 +440,31 @@ public abstract class CaseMap { } /** - * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}). + * Case-folds a string. + * The result may be longer or shorter than the original. * *

Case-folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I * and dotless i that are marked with 'T' in CaseFolding.txt. * - *

The result may be longer or shorter than the original. + * @param src The original string. + * @return the result string. + * + * @see UCharacter#foldCase(String, int) + * @draft ICU 60 + * @provisional This API might change or be removed in a future release. + */ + public String apply(CharSequence src) { + return CaseMapImpl.fold(internalOptions, src); + } + + /** + * Case-folds a string and optionally records edits (see {@link #omitUnchangedText}). + * The result may be longer or shorter than the original. + * + *

Case-folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. * * @param src The original string. * @param dest A buffer for the result string. Must not be null. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java index 480c3180aae..c150f3772e5 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/lang/UCharacterCaseTest.java @@ -420,14 +420,13 @@ public final class UCharacterCaseTest extends TestFmwk UCharacter.toTitleCase(LOC_DUTCH, "ijssel igloo IJMUIDEN", null)); // Also check the behavior using Java Locale - Locale JAVALOC_DUTCH = new Locale("nl"); assertEquals("Dutch titlecase check in English (Java Locale)", "Ijssel Igloo Ijmuiden", UCharacter.toTitleCase(Locale.ENGLISH, "ijssel igloo IJMUIDEN", null)); assertEquals("Dutch titlecase check in Dutch (Java Locale)", "IJssel Igloo IJmuiden", - UCharacter.toTitleCase(JAVALOC_DUTCH, "ijssel igloo IJMUIDEN", null)); + UCharacter.toTitleCase(DUTCH_LOCALE_, "ijssel igloo IJMUIDEN", null)); iter.setText("ijssel igloo IjMUIdEN iPoD ijenough"); assertEquals("Dutch titlecase check in Dutch with nolowercase option", @@ -1238,7 +1237,7 @@ public final class UCharacterCaseTest extends TestFmwk sb.delete(0, sb.length()); edits.reset(); sb = CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply( - new Locale("nl"), null, "IjssEL IglOo", sb, edits); + DUTCH_LOCALE_, null, "IjssEL IglOo", sb, edits); assertEquals("toTitle(IjssEL IglOo)", "J", sb.toString()); EditChange[] titleExpectedChanges = new EditChange[] { new EditChange(false, 1, 1), @@ -1265,6 +1264,32 @@ public final class UCharacterCaseTest extends TestFmwk foldExpectedChanges, true); } + @Test + public void TestCaseMapToString() { + // String apply(..., CharSequence) + // Omit unchanged text. + assertEquals("toLower(Istanbul)", "ıb", + CaseMap.toLower().omitUnchangedText().apply(TURKISH_LOCALE_, "IstanBul")); + assertEquals("toUpper(Πατάτα)", "ΑΤΑΤΑ", + CaseMap.toUpper().omitUnchangedText().apply(GREEK_LOCALE_, "Πατάτα")); + assertEquals("toTitle(IjssEL IglOo)", "J", + CaseMap.toTitle().omitUnchangedText().noBreakAdjustment().noLowercase().apply( + DUTCH_LOCALE_, null, "IjssEL IglOo")); + assertEquals("fold(IßtanBul)", "ıssb", + CaseMap.fold().omitUnchangedText().turkic().apply("IßtanBul")); + + // Return the whole result string. + assertEquals("toLower(Istanbul)", "ıstanbul", + CaseMap.toLower().apply(TURKISH_LOCALE_, "IstanBul")); + assertEquals("toUpper(Πατάτα)", "ΠΑΤΑΤΑ", + CaseMap.toUpper().apply(GREEK_LOCALE_, "Πατάτα")); + assertEquals("toTitle(IjssEL IglOo)", "IJssEL IglOo", + CaseMap.toTitle().noBreakAdjustment().noLowercase().apply( + DUTCH_LOCALE_, null, "IjssEL IglOo")); + assertEquals("fold(IßtanBul)", "ısstanbul", + CaseMap.fold().turkic().apply("IßtanBul")); + } + // private data members - test data -------------------------------------- private static final Locale TURKISH_LOCALE_ = new Locale("tr", "TR"); @@ -1272,6 +1297,7 @@ public final class UCharacterCaseTest extends TestFmwk private static final Locale GREEK_LOCALE_ = new Locale("el", "GR"); private static final Locale ENGLISH_LOCALE_ = new Locale("en", "US"); private static final Locale LITHUANIAN_LOCALE_ = new Locale("lt", "LT"); + private static final Locale DUTCH_LOCALE_ = new Locale("nl"); private static final int CHARACTER_UPPER_[] = {0x41, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,