/* titlecasing loop */
while(prev<srcLength) {
/* find next index where to titlecase */
- int32_t idx;
+ int32_t index;
if(isFirstIndex) {
isFirstIndex=FALSE;
- idx=iter->first();
+ index=iter->first();
} else {
- idx=iter->next();
+ index=iter->next();
}
- if(idx==UBRK_DONE || idx>srcLength) {
- idx=srcLength;
+ if(index==UBRK_DONE || index>srcLength) {
+ index=srcLength;
}
/*
* b) first case letter (titlecase) [titleStart..titleLimit[
* c) subsequent characters (lowercase) [titleLimit..index[
*/
- if(prev<idx) {
+ if(prev<index) {
/* find and copy uncased characters [prev..titleStart[ */
int32_t titleStart=prev;
int32_t titleLimit=prev;
UChar32 c;
- U16_NEXT(src, titleLimit, idx, c);
+ U16_NEXT(src, titleLimit, index, c);
if((options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(NULL, c)) {
/* Adjust the titlecasing index (titleStart) to the next cased character. */
for(;;) {
titleStart=titleLimit;
- if(titleLimit==idx) {
+ if(titleLimit==index) {
/*
* only uncased characters in [prev..index[
* stop with titleStart==titleLimit==index
*/
break;
}
- U16_NEXT(src, titleLimit, idx, c);
+ U16_NEXT(src, titleLimit, index, c);
if(UCASE_NONE!=ucase_getType(NULL, c)) {
break; /* cased letter at [titleStart..titleLimit[ */
}
}
/* Special case Dutch IJ titlecasing */
- if (titleStart+1 < idx &&
+ if (titleStart+1 < index &&
caseLocale == UCASE_LOC_DUTCH &&
(src[titleStart] == 0x0049 || src[titleStart] == 0x0069) &&
- (src[titleStart+1] == 0x004A || src[titleStart+1] == 0x006A)) {
+ src[titleStart+1] == 0x006A) {
destIndex=appendUChar(dest, destIndex, destCapacity, 0x004A);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
}
/* lowercase [titleLimit..index[ */
- if(titleLimit<idx) {
+ if(titleLimit<index) {
if((options&U_TITLECASE_NO_LOWERCASE)==0) {
/* Normal operation: Lowercase the rest of the word. */
destIndex+=
caseLocale, options, ucase_toFullLower,
dest+destIndex, destCapacity-destIndex,
src, &csc,
- titleLimit, idx,
+ titleLimit, index,
edits, errorCode);
if(errorCode==U_BUFFER_OVERFLOW_ERROR) {
errorCode=U_ZERO_ERROR;
} else {
/* Optionally just copy the rest of the word unchanged. */
destIndex=appendUnchanged(dest, destIndex, destCapacity,
- src+titleLimit, idx-titleLimit, options, edits);
+ src+titleLimit, index-titleLimit, options, edits);
if(destIndex<0) {
errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
return 0;
}
}
- prev=idx;
+ prev=index;
}
return checkOverflowAndEditsError(destIndex, destCapacity, edits, errorCode);
import java.io.IOException;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.Edits;
import com.ibm.icu.util.ICUUncheckedIOException;
}
}
+ private static void internalToLower(int caseLocale, int options, StringContextIterator iter,
+ Appendable dest, Edits edits) throws IOException {
+ int c;
+ while ((c = iter.nextCaseMapCP()) >= 0) {
+ c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+ }
+ }
+
public static <A extends Appendable> A toLower(int caseLocale, int options,
CharSequence src, A dest, Edits edits) {
try {
edits.reset();
}
StringContextIterator iter = new StringContextIterator(src);
- int c;
- while ((c = iter.nextCaseMapCP()) >= 0) {
- c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
- appendResult(c, dest, iter.getCPLength(), options, edits);
- }
+ internalToLower(caseLocale, options, iter, dest, edits);
return dest;
} catch (IOException e) {
throw new ICUUncheckedIOException(e);
}
}
+ public static <A extends Appendable> A toTitle(
+ int caseLocale, int options, BreakIterator titleIter,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+
+ /* set up local variables */
+ StringContextIterator iter = new StringContextIterator(src);
+ int srcLength = src.length();
+ int prev=0;
+ boolean isFirstIndex=true;
+
+ /* titlecasing loop */
+ while(prev<srcLength) {
+ /* find next index where to titlecase */
+ int index;
+ if(isFirstIndex) {
+ isFirstIndex=false;
+ index=titleIter.first();
+ } else {
+ index=titleIter.next();
+ }
+ if(index==BreakIterator.DONE || index>srcLength) {
+ index=srcLength;
+ }
+
+ /*
+ * Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * In this implementation, segment [prev..index[ into 3 parts:
+ * a) uncased characters (copy as-is) [prev..titleStart[
+ * b) first case letter (titlecase) [titleStart..titleLimit[
+ * c) subsequent characters (lowercase) [titleLimit..index[
+ */
+ if(prev<index) {
+ // find and copy uncased characters [prev..titleStart[
+ int titleStart=prev;
+ iter.setLimit(index);
+ int c=iter.nextCaseMapCP();
+ if((options&UCharacter.TITLECASE_NO_BREAK_ADJUSTMENT)==0
+ && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
+ // Adjust the titlecasing index (titleStart) to the next cased character.
+ while((c=iter.nextCaseMapCP())>=0
+ && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
+ // If c<0 then we have only uncased characters in [prev..index[
+ // and stopped with titleStart==titleLimit==index.
+ titleStart=iter.getCPStart();
+ appendUnchanged(src, prev, titleStart-prev, dest, options, edits);
+ }
+
+ if(titleStart<index) {
+ int titleLimit=iter.getCPLimit();
+ // titlecase c which is from [titleStart..titleLimit[
+ c = UCaseProps.INSTANCE.toFullTitle(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+
+ // Special case Dutch IJ titlecasing
+ if (titleStart+1 < index && caseLocale == UCaseProps.LOC_DUTCH) {
+ char c1 = src.charAt(titleStart);
+ char c2 = src.charAt(titleStart+1);
+ if ((c1 == 'i' || c1 == 'I') && c2 == 'j') {
+ dest.append('J');
+ if (edits != null) {
+ edits.addReplace(1, 1);
+ }
+ c=iter.nextCaseMapCP();
+ titleLimit++;
+ assert c == c2;
+ assert titleLimit == iter.getCPLimit();
+ }
+ }
+
+ // lowercase [titleLimit..index[
+ if(titleLimit<index) {
+ if((options&UCharacter.TITLECASE_NO_LOWERCASE)==0) {
+ // Normal operation: Lowercase the rest of the word.
+ internalToLower(caseLocale, options, iter, dest, edits);
+ } else {
+ // Optionally just copy the rest of the word unchanged.
+ appendUnchanged(src, titleLimit, index-titleLimit, dest, options, edits);
+ iter.moveToLimit();
+ }
+ }
+ }
+ }
+
+ prev=index;
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
+ }
+
private static final class GreekUpper {
// Data bits.
private static final int UPPER_MASK = 0x3ff;
private static final int LOC_TURKISH=2;
private static final int LOC_LITHUANIAN=3;
static final int LOC_GREEK=4;
+ public static final int LOC_DUTCH=5;
public static final int getCaseLocale(Locale locale) {
return getCaseLocale(locale.getLanguage());
return LOC_GREEK;
} else if(language.equals("lt")) {
return LOC_LITHUANIAN;
+ } else if(language.equals("nl")) {
+ return LOC_DUTCH;
}
} else if(language.length()==3) {
if(language.equals("tur") || language.equals("aze")) {
return LOC_GREEK;
} else if(language.equals("lit")) {
return LOC_LITHUANIAN;
+ } else if(language.equals("nld")) {
+ return LOC_DUTCH;
}
}
return LOC_ROOT;
import java.util.Map;
import com.ibm.icu.impl.CaseMapImpl;
-import com.ibm.icu.impl.CaseMapImpl.StringContextIterator;
import com.ibm.icu.impl.IllegalIcuArgumentException;
import com.ibm.icu.impl.Trie2;
import com.ibm.icu.impl.UBiDiProps;
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
- return CaseMapImpl.toLower(caseLocale, 0, str, new StringBuilder(), null).toString();
+ return CaseMapImpl.toLower(caseLocale, 0, str,
+ new StringBuilder(str.length()), null).toString();
}
}
caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, str, new StringBuilder(), edits);
return applyEdits(str, replacementChars, edits);
} else {
- return CaseMapImpl.toUpper(caseLocale, 0, str, new StringBuilder(), null).toString();
+ return CaseMapImpl.toUpper(caseLocale, 0, str,
+ new StringBuilder(str.length()), null).toString();
+ }
+ }
+
+ private static String toTitleCase(int caseLocale, int options, BreakIterator titleIter, String str) {
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes. Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = CaseMapImpl.toTitle(
+ caseLocale, CaseMapImpl.OMIT_UNCHANGED_TEXT, titleIter, str,
+ new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return CaseMapImpl.toTitle(caseLocale, 0, titleIter, str,
+ new StringBuilder(str.length()), null).toString();
}
}
public static String toTitleCase(Locale locale, String str,
BreakIterator breakiter)
{
- return toTitleCase(ULocale.forLocale(locale), str, breakiter);
+ return toTitleCase(locale, str, breakiter, 0);
}
/**
* @see #TITLECASE_NO_BREAK_ADJUSTMENT
*/
public static String toTitleCase(ULocale locale, String str,
- BreakIterator titleIter,
- int options) {
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int c, nc, srcLength = str.length();
-
- if (locale == null) {
- locale = ULocale.getDefault();
- }
- int caseLocale = UCaseProps.getCaseLocale(locale);
-
+ BreakIterator titleIter, int options) {
if(titleIter == null) {
titleIter = BreakIterator.getWordInstance(locale);
}
titleIter.setText(str);
-
- int prev, titleStart, index;
- boolean isFirstIndex;
- boolean isDutch = locale.getLanguage().equals("nl");
- boolean FirstIJ = true;
-
- /* set up local variables */
- prev=0;
- isFirstIndex=true;
-
- /* titlecasing loop */
- while(prev<srcLength) {
- /* find next index where to titlecase */
- if(isFirstIndex) {
- isFirstIndex=false;
- index=titleIter.first();
- } else {
- index=titleIter.next();
- }
- if(index==BreakIterator.DONE || index>srcLength) {
- index=srcLength;
- }
-
- /*
- * Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * In this implementation, segment [prev..index[ into 3 parts:
- * a) uncased characters (copy as-is) [prev..titleStart[
- * b) first case letter (titlecase) [titleStart..titleLimit[
- * c) subsequent characters (lowercase) [titleLimit..index[
- */
- if(prev<index) {
- /* find and copy uncased characters [prev..titleStart[ */
- iter.setLimit(index);
- c=iter.nextCaseMapCP();
- if((options&TITLECASE_NO_BREAK_ADJUSTMENT)==0
- && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {
- while((c=iter.nextCaseMapCP())>=0
- && UCaseProps.NONE==UCaseProps.INSTANCE.getType(c)) {}
- titleStart=iter.getCPStart();
- if(prev<titleStart) {
- result.append(str, prev, titleStart);
- }
- } else {
- titleStart=prev;
- }
-
- if(titleStart<index) {
- FirstIJ = true;
- /* titlecase c which is from titleStart */
- c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, caseLocale);
-
- /* decode the result and lowercase up to index */
- for(;;) {
- if(c<0) {
- /* (not) original code point */
- c=~c;
- result.appendCodePoint(c);
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- } else {
- /* append single-code point mapping */
- result.appendCodePoint(c);
- }
-
- if((options&TITLECASE_NO_LOWERCASE)!=0) {
- /* Optionally just copy the rest of the word unchanged. */
-
- int titleLimit=iter.getCPLimit();
- if(titleLimit<index) {
- /* Special Case - Dutch IJ Titlecasing */
- if (isDutch && c == 0x0049 && str.charAt(titleLimit) == 'j') {
- result.append('J').append(str, titleLimit + 1, index);
- } else {
- result.append(str, titleLimit, index);
- }
- }
- iter.moveToLimit();
- break;
- } else if((nc=iter.nextCaseMapCP())>=0) {
- if (isDutch && (nc == 0x004A || nc == 0x006A)
- && (c == 0x0049) && (FirstIJ == true)) {
- c = 0x004A; /* J */
- FirstIJ = false;
- } else {
- /* Normal operation: Lowercase the rest of the word. */
- c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, caseLocale);
- }
- } else {
- break;
- }
- }
- }
- }
-
- prev=index;
- }
- return result.toString();
+ return toTitleCase(getCaseLocale(locale), options, titleIter, str);
}
public static String toTitleCase(Locale locale, String str,
BreakIterator titleIter,
int options) {
- return toTitleCase(ULocale.forLocale(locale), str, titleIter, options);
+ if(titleIter == null) {
+ titleIter = BreakIterator.getWordInstance(locale);
+ }
+ titleIter.setText(str);
+ return toTitleCase(getCaseLocale(locale), options, titleIter, str);
}
/**
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.util.ULocale;
-// TODO: issues/questions
-// - optimizing strategies for unstyled text: stop after number of changes or length of replacement?
-
/**
* Low-level case mapping options and methods. Immutable.
* "Setters" return instances with the union of the current and new options set.
*/
public <A extends Appendable> A apply(
Locale locale, BreakIterator iter, CharSequence src, A dest, Edits edits) {
- return null;
+ if (iter == null) {
+ iter = BreakIterator.getWordInstance(locale);
+ }
+ iter.setText(src.toString());
+ return CaseMapImpl.toTitle(
+ getCaseLocale(locale), internalOptions, iter, src, dest, edits);
}
}