From: Markus Scherer Date: Tue, 7 Feb 2017 00:58:52 +0000 (+0000) Subject: ICU-12410 move/re-port titlecasing, support Edits X-Git-Tag: release-59-rc~145^2~9 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4805866163c3aa7c058af63c1ca77c1e1641cf03;p=icu ICU-12410 move/re-port titlecasing, support Edits X-SVN-Rev: 39647 --- diff --git a/icu4c/source/common/ustrcase.cpp b/icu4c/source/common/ustrcase.cpp index 2a44f6eba4c..f08a1fae53f 100644 --- a/icu4c/source/common/ustrcase.cpp +++ b/icu4c/source/common/ustrcase.cpp @@ -577,15 +577,15 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it /* titlecasing loop */ while(prevfirst(); + index=iter->first(); } else { - idx=iter->next(); + index=iter->next(); } - if(idx==UBRK_DONE || idx>srcLength) { - idx=srcLength; + if(index==UBRK_DONE || index>srcLength) { + index=srcLength; } /* @@ -601,24 +601,24 @@ ustrcase_internalToTitle(int32_t caseLocale, uint32_t options, BreakIterator *it * b) first case letter (titlecase) [titleStart..titleLimit[ * c) subsequent characters (lowercase) [titleLimit..index[ */ - if(prev= 0) { + c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); + appendResult(c, dest, iter.getCPLength(), options, edits); + } + } + public static A toLower(int caseLocale, int options, CharSequence src, A dest, Edits edits) { try { @@ -198,11 +209,7 @@ public final class CaseMapImpl { edits.reset(); } StringContextIterator iter = new StringContextIterator(src); - int c; - while ((c = iter.nextCaseMapCP()) >= 0) { - c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale); - appendResult(c, dest, iter.getCPLength(), options, edits); - } + internalToLower(caseLocale, options, iter, dest, edits); return dest; } catch (IOException e) { throw new ICUUncheckedIOException(e); @@ -230,6 +237,107 @@ public final class CaseMapImpl { } } + public static A toTitle( + int caseLocale, int options, BreakIterator titleIter, + CharSequence src, A dest, Edits edits) { + try { + if (edits != null) { + edits.reset(); + } + + /* set up local variables */ + StringContextIterator iter = new StringContextIterator(src); + int srcLength = src.length(); + int prev=0; + boolean isFirstIndex=true; + + /* titlecasing loop */ + while(prevsrcLength) { + index=srcLength; + } + + /* + * Unicode 4 & 5 section 3.13 Default Case Operations: + * + * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex + * #29, "Text Boundaries." Between each pair of word boundaries, find the first + * cased character F. If F exists, map F to default_title(F); then map each + * subsequent character C to default_lower(C). + * + * In this implementation, segment [prev..index[ into 3 parts: + * a) uncased characters (copy as-is) [prev..titleStart[ + * b) first case letter (titlecase) [titleStart..titleLimit[ + * c) subsequent characters (lowercase) [titleLimit..index[ + */ + if(prev=0 + && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {} + // If c<0 then we have only uncased characters in [prev..index[ + // and stopped with titleStart==titleLimit==index. + titleStart=iter.getCPStart(); + appendUnchanged(src, prev, titleStart-prev, dest, options, edits); + } + + if(titleStartsrcLength) { - index=srcLength; - } - - /* - * Unicode 4 & 5 section 3.13 Default Case Operations: - * - * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex - * #29, "Text Boundaries." Between each pair of word boundaries, find the first - * cased character F. If F exists, map F to default_title(F); then map each - * subsequent character C to default_lower(C). - * - * In this implementation, segment [prev..index[ into 3 parts: - * a) uncased characters (copy as-is) [prev..titleStart[ - * b) first case letter (titlecase) [titleStart..titleLimit[ - * c) subsequent characters (lowercase) [titleLimit..index[ - */ - if(prev=0 - && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {} - titleStart=iter.getCPStart(); - if(prev=0) { - if (isDutch && (nc == 0x004A || nc == 0x006A) - && (c == 0x0049) && (FirstIJ == true)) { - c = 0x004A; /* J */ - FirstIJ = false; - } else { - /* Normal operation: Lowercase the rest of the word. */ - c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, caseLocale); - } - } else { - break; - } - } - } - } - - prev=index; - } - return result.toString(); + return toTitleCase(getCaseLocale(locale), options, titleIter, str); } @@ -5323,7 +5230,11 @@ public final class UCharacter implements ECharacterCategory, ECharacterDirection public static String toTitleCase(Locale locale, String str, BreakIterator titleIter, int options) { - return toTitleCase(ULocale.forLocale(locale), str, titleIter, options); + if(titleIter == null) { + titleIter = BreakIterator.getWordInstance(locale); + } + titleIter.setText(str); + return toTitleCase(getCaseLocale(locale), options, titleIter, str); } /** diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java index e1747b635c4..6b0cea0540d 100644 --- a/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java +++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CaseMap.java @@ -9,9 +9,6 @@ import com.ibm.icu.impl.UCaseProps; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.util.ULocale; -// TODO: issues/questions -// - optimizing strategies for unstyled text: stop after number of changes or length of replacement? - /** * Low-level case mapping options and methods. Immutable. * "Setters" return instances with the union of the current and new options set. @@ -262,7 +259,12 @@ public abstract class CaseMap { */ public A apply( Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) { - return null; + if (iter == null) { + iter = BreakIterator.getWordInstance(locale); + } + iter.setText(src.toString()); + return CaseMapImpl.toTitle( + getCaseLocale(locale), internalOptions, iter, src, dest, edits); } }