* for each character.
* TODO: Try to re-consolidate one way or another with the non-Greek function.
*/
-int32_t toUpper(int32_t caseLocale, uint32_t options,
+int32_t toUpper(uint32_t options,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
Edits *edits,
}
} else {
const UChar *s;
- c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, caseLocale);
+ c=ucase_toFullUpper(NULL, c, NULL, NULL, &s, UCASE_LOC_GREEK);
destIndex = appendResult(dest, destIndex, destCapacity, c, s,
nextIndex - i, options, edits);
if (destIndex < 0) {
icu::Edits *edits,
UErrorCode &errorCode) {
if (caseLocale == UCASE_LOC_GREEK) {
- return GreekUpper::toUpper(caseLocale, options, dest, destCapacity, src, srcLength, edits, errorCode);
+ return GreekUpper::toUpper(options, dest, destCapacity, src, srcLength, edits, errorCode);
}
UCaseContext csc=UCASECONTEXT_INITIALIZER;
csc.p=(void *)src;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl;
+import java.io.IOException;
+
+import com.ibm.icu.text.Edits;
+import com.ibm.icu.util.ICUUncheckedIOException;
import com.ibm.icu.util.ULocale;
// TODO: rename to CaseMapImpl
public static final class StringContextIterator implements UCaseProps.ContextIterator {
/**
* Constructor.
- * @param s String to iterate over.
+ * @param src String to iterate over.
*/
- public StringContextIterator(String s) {
- this.s=s;
- limit=s.length();
+ public StringContextIterator(CharSequence src) {
+ this.s=src;
+ limit=src.length();
cpStart=cpLimit=index=0;
dir=0;
}
public int nextCaseMapCP() {
cpStart=cpLimit;
if(cpLimit<limit) {
- int c=s.codePointAt(cpLimit);
+ int c=Character.codePointAt(s, cpLimit);
cpLimit+=Character.charCount(c);
return c;
} else {
return cpLimit;
}
+ public int getCPLength() {
+ return cpLimit-cpStart;
+ }
+
// implement UCaseProps.ContextIterator
// The following code is not used anywhere in this private class
@Override
int c;
if(dir>0 && index<s.length()) {
- c=s.codePointAt(index);
+ c=Character.codePointAt(s, index);
index+=Character.charCount(c);
return c;
} else if(dir<0 && index>0) {
- c=s.codePointBefore(index);
+ c=Character.codePointBefore(s, index);
index-=Character.charCount(c);
return c;
}
}
// variables
- protected String s;
+ protected CharSequence s;
protected int index, limit, cpStart, cpLimit;
protected int dir; // 0=initial state >0=forward <0=backward
}
- /** Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}. */
- private static final void appendResult(int c, StringBuilder result) {
+ private static int appendCodePoint(Appendable a, int c) throws IOException {
+ if (c <= Character.MAX_VALUE) {
+ a.append((char)c);
+ return 1;
+ } else {
+ a.append((char)(0xd7c0 + (c >> 10)));
+ a.append((char)(Character.MIN_LOW_SURROGATE + (c & 0x3ff)));
+ return 2;
+ }
+ }
+
+ /**
+ * Appends a full case mapping result, see {@link UCaseProps#MAX_STRING_LENGTH}.
+ * @throws IOException
+ */
+ private static void appendResult(int result, Appendable dest,
+ int cpLength, int options, Edits edits) throws IOException {
// Decode the result.
- if (c < 0) {
+ if (result < 0) {
// (not) original code point
- result.appendCodePoint(~c);
- } else if (c <= UCaseProps.MAX_STRING_LENGTH) {
+ if (edits != null) {
+ edits.addUnchanged(cpLength);
+ // TODO: remove package path
+ if ((options & com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT) != 0) {
+ return;
+ }
+ }
+ appendCodePoint(dest, ~result);
+ } else if (result <= UCaseProps.MAX_STRING_LENGTH) {
// The mapping has already been appended to result.
+ if (edits != null) {
+ edits.addReplace(cpLength, result);
+ }
} else {
// Append the single-code point mapping.
- result.appendCodePoint(c);
+ int length = appendCodePoint(dest, result);
+ if (edits != null) {
+ edits.addReplace(cpLength, length);
+ }
}
}
- // TODO: Move the other string case mapping functions from UCharacter to here, too.
-
- public static String toUpper(ULocale locale, String str) {
- if (locale == null) {
- locale = ULocale.getDefault();
+ private static final void appendUnchanged(CharSequence src, int start, int length,
+ Appendable dest, int options, Edits edits) throws IOException {
+ if (length > 0) {
+ if (edits != null) {
+ edits.addUnchanged(length);
+ // TODO: remove package path
+ if ((options & com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT) != 0) {
+ return;
+ }
+ }
+ dest.append(src, start, start + length);
}
- int[] locCache = new int[] { UCaseProps.getCaseLocale(locale, null) };
- if (locCache[0] == UCaseProps.LOC_GREEK) {
- return GreekUpper.toUpper(str, locCache);
+ }
+
+ public static <A extends Appendable> A toLower(int caseLocale, int options,
+ CharSequence src, A dest, Edits edits) {
+ try {
+ if (edits != null) {
+ edits.reset();
+ }
+ StringContextIterator iter = new StringContextIterator(src);
+ int c;
+ while ((c = iter.nextCaseMapCP()) >= 0) {
+ c = UCaseProps.INSTANCE.toFullLower(c, iter, dest, caseLocale);
+ appendResult(c, dest, iter.getCPLength(), options, edits);
+ }
+ return dest;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
}
+ }
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int c;
- while((c=iter.nextCaseMapCP())>=0) {
- c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, locale, locCache);
- appendResult(c, result);
+ public static String toUpper(ULocale locale, String str) {
+ try {
+ int options = 0; Edits edits = null; // TODO
+ if (locale == null) {
+ locale = ULocale.getDefault();
+ }
+ int caseLocale = UCaseProps.getCaseLocale(locale);
+ if (caseLocale == UCaseProps.LOC_GREEK) {
+ return GreekUpper.toUpper(str);
+ }
+
+ StringContextIterator iter = new StringContextIterator(str);
+ StringBuilder result = new StringBuilder(str.length());
+ int c;
+ while((c=iter.nextCaseMapCP())>=0) {
+ c = UCaseProps.INSTANCE.toFullUpper(c, iter, result, caseLocale);
+ appendResult(c, result, iter.getCPLength(), options, edits);
+ }
+ return result.toString();
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
}
- return result.toString();
}
private static final class GreekUpper {
* TODO: Try to re-consolidate one way or another with the non-Greek function.
*
* <p>Keep this consistent with the C++ versions in ustrcase.cpp (UTF-16) and ucasemap.cpp (UTF-8).
+ * @throws IOException
*/
- private static String toUpper(CharSequence s, int[] locCache) {
+ private static String toUpper(CharSequence s) throws IOException {
+ int options = 0; Edits edits = null; // TODO
StringBuilder result = new StringBuilder(s.length());
int state = 0;
for (int i = 0; i < s.length();) {
--numYpogegrammeni;
}
} else {
- c = UCaseProps.INSTANCE.toFullUpper(c, null, result, null, locCache);
- appendResult(c, result);
+ c = UCaseProps.INSTANCE.toFullUpper(c, null, result, UCaseProps.LOC_GREEK);
+ appendResult(c, result, nextIndex - i, options, edits);
}
i = nextIndex;
state = nextState;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
+import java.util.Locale;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
// read exceptions[]
count=indexes[IX_EXC_LENGTH];
if(count>0) {
- exceptions=ICUBinary.getChars(bytes, count, 0);
+ exceptions=ICUBinary.getString(bytes, count, 0);
}
// read unfold[]
*
* @param excWord (in) initial exceptions word
* @param index (in) desired slot index
- * @param excOffset (in) offset into exceptions[] after excWord=exceptions[excOffset++];
+ * @param excOffset (in) offset into exceptions[] after excWord=exceptions.charAt(excOffset++);
* @return bits 31..0: slot value
* 63..32: modified excOffset, moved to the last char of the value, use +1 for beginning of next slot
*/
long value;
if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index);
- value=exceptions[excOffset];
+ value=exceptions.charAt(excOffset);
} else {
excOffset+=2*slotOffset(excWord, index);
- value=exceptions[excOffset++];
- value=(value<<16)|exceptions[excOffset];
+ value=exceptions.charAt(excOffset++);
+ value=(value<<16)|exceptions.charAt(excOffset);
}
return value |((long)excOffset<<32);
}
int value;
if((excWord&EXC_DOUBLE_SLOTS)==0) {
excOffset+=slotOffset(excWord, index);
- value=exceptions[excOffset];
+ value=exceptions.charAt(excOffset);
} else {
excOffset+=2*slotOffset(excWord, index);
- value=exceptions[excOffset++];
- value=(value<<16)|exceptions[excOffset];
+ value=exceptions.charAt(excOffset++);
+ value=(value<<16)|exceptions.charAt(excOffset);
}
return value;
}
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_LOWER)) {
c=getSlotValue(excWord, EXC_LOWER, excOffset);
}
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
if(hasSlot(excWord, EXC_UPPER)) {
c=getSlotValue(excWord, EXC_UPPER, excOffset);
}
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index;
if(hasSlot(excWord, EXC_TITLE)) {
index=EXC_TITLE;
*/
int excOffset0, excOffset=getExceptionsOffset(props);
int closureOffset;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index, closureLength, fullLength, length;
excOffset0=excOffset;
/* add the full case folding string */
length=fullLength&0xf;
if(length!=0) {
- set.add(new String(exceptions, excOffset, length));
+ set.add(exceptions.substring(excOffset, excOffset+length));
excOffset+=length;
}
}
/* add each code point in the closure string */
- for(index=0; index<closureLength; index+=UTF16.getCharCount(c)) {
- c=UTF16.charAt(exceptions, closureOffset, exceptions.length, index);
+ int limit=closureOffset+closureLength;
+ for(index=closureOffset; index<limit; index+=UTF16.getCharCount(c)) {
+ c=exceptions.codePointAt(index);
set.add(c);
}
}
if(!propsHasException(props)) {
return props&DOT_MASK;
} else {
- return (exceptions[getExceptionsOffset(props)]>>EXC_DOT_SHIFT)&DOT_MASK;
+ return (exceptions.charAt(getExceptionsOffset(props))>>EXC_DOT_SHIFT)&DOT_MASK;
}
}
*/
public static final int MAX_STRING_LENGTH=0x1f;
- private static final int LOC_UNKNOWN=0;
- private static final int LOC_ROOT=1;
+ //ivate static final int LOC_UNKNOWN=0;
+ public static final int LOC_ROOT=1;
private static final int LOC_TURKISH=2;
private static final int LOC_LITHUANIAN=3;
static final int LOC_GREEK=4;
- /*
- * Checks and caches the type of locale ID as it is relevant for case mapping.
- * If the locCache is not null, then it must be initialized with locCache[0]=0 .
- */
- static final int getCaseLocale(ULocale locale, int[] locCache) {
- int result;
-
- if(locCache!=null && (result=locCache[0])!=LOC_UNKNOWN) {
- return result;
- }
-
- result=LOC_ROOT;
-
- String language=locale.getLanguage();
- if(language.equals("tr") || language.equals("tur") || language.equals("az") || language.equals("aze")) {
- result=LOC_TURKISH;
- } else if(language.equals("el") || language.equals("ell")) {
- result=LOC_GREEK;
- } else if(language.equals("lt") || language.equals("lit")) {
- result=LOC_LITHUANIAN;
- }
-
- if(locCache!=null) {
- locCache[0]=result;
+ public static final int getCaseLocale(Locale locale) {
+ return getCaseLocale(locale.getLanguage());
+ }
+ public static final int getCaseLocale(ULocale locale) {
+ return getCaseLocale(locale.getLanguage());
+ }
+ /** Accepts both 2- and 3-letter language subtags. */
+ private static final int getCaseLocale(String language) {
+ // Check the subtag length to reduce the number of comparisons
+ // for locales without special behavior.
+ // Fastpath for English "en" which is often used for default (=root locale) case mappings,
+ // and for Chinese "zh": Very common but no special case mapping behavior.
+ if(language.length()==2) {
+ if(language.equals("en") || language.charAt(0)>'t') {
+ return LOC_ROOT;
+ } else if(language.equals("tr") || language.equals("az")) {
+ return LOC_TURKISH;
+ } else if(language.equals("el")) {
+ return LOC_GREEK;
+ } else if(language.equals("lt")) {
+ return LOC_LITHUANIAN;
+ }
+ } else if(language.length()==3) {
+ if(language.equals("tur") || language.equals("aze")) {
+ return LOC_TURKISH;
+ } else if(language.equals("ell")) {
+ return LOC_GREEK;
+ } else if(language.equals("lit")) {
+ return LOC_LITHUANIAN;
+ }
}
- return result;
+ return LOC_ROOT;
}
/* Is followed by {case-ignorable}* cased ? (dir determines looking forward/backward) */
* See ContextIterator for details.
* If iter==null then a context-independent result is returned.
* @param out If the mapping result is a string, then it is appended to out.
- * @param locale Locale ID for locale-dependent mappings.
- * @param locCache Initialize locCache[0] to 0; may be used to cache the result of parsing
- * the locale ID for subsequent calls.
- * Can be null.
+ * @param caseLocale Case locale value from ucase_getCaseLocale().
* @return Output code point or string length, see MAX_STRING_LENGTH.
*
* @see ContextIterator
* @see #MAX_STRING_LENGTH
* @internal
*/
- public final int toFullLower(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
+ public final int toFullLower(int c, ContextIterator iter, Appendable out, int caseLocale) {
int result, props;
result=c;
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full;
excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */
- int loc=getCaseLocale(locale, locCache);
-
/*
* Test for conditional mappings first
* (otherwise the unconditional default mappings are always taken),
* then test for characters that have unconditional mappings in SpecialCasing.txt,
* then get the UnicodeData.txt mappings.
*/
- if( loc==LOC_LITHUANIAN &&
+ if( caseLocale==LOC_LITHUANIAN &&
/* base characters, find accents above */
(((c==0x49 || c==0x4a || c==0x12e) &&
isFollowedByMoreAbove(iter)) ||
00CD; 0069 0307 0301; 00CD; 00CD; lt; # LATIN CAPITAL LETTER I WITH ACUTE
0128; 0069 0307 0303; 0128; 0128; lt; # LATIN CAPITAL LETTER I WITH TILDE
*/
- switch(c) {
- case 0x49: /* LATIN CAPITAL LETTER I */
- out.append(iDot);
- return 2;
- case 0x4a: /* LATIN CAPITAL LETTER J */
- out.append(jDot);
- return 2;
- case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
- out.append(iOgonekDot);
- return 2;
- case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
- out.append(iDotGrave);
- return 3;
- case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
- out.append(iDotAcute);
- return 3;
- case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
- out.append(iDotTilde);
- return 3;
- default:
- return 0; /* will not occur */
+ try {
+ switch(c) {
+ case 0x49: /* LATIN CAPITAL LETTER I */
+ out.append(iDot);
+ return 2;
+ case 0x4a: /* LATIN CAPITAL LETTER J */
+ out.append(jDot);
+ return 2;
+ case 0x12e: /* LATIN CAPITAL LETTER I WITH OGONEK */
+ out.append(iOgonekDot);
+ return 2;
+ case 0xcc: /* LATIN CAPITAL LETTER I WITH GRAVE */
+ out.append(iDotGrave);
+ return 3;
+ case 0xcd: /* LATIN CAPITAL LETTER I WITH ACUTE */
+ out.append(iDotAcute);
+ return 3;
+ case 0x128: /* LATIN CAPITAL LETTER I WITH TILDE */
+ out.append(iDotTilde);
+ return 3;
+ default:
+ return 0; /* will not occur */
+ }
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
}
/* # Turkish and Azeri */
- } else if(loc==LOC_TURKISH && c==0x130) {
+ } else if(caseLocale==LOC_TURKISH && c==0x130) {
/*
# I and i-dotless; I-dot and i are case pairs in Turkish and Azeri
# The following rules handle those cases.
0130; 0069; 0130; 0130; az # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
return 0x69;
- } else if(loc==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
+ } else if(caseLocale==LOC_TURKISH && c==0x307 && isPrecededBy_I(iter)) {
/*
# When lowercasing, remove dot_above in the sequence I + dot_above, which will turn into i.
# This matches the behavior of the canonically equivalent I-dot_above
0307; ; 0307; 0307; az After_I; # COMBINING DOT ABOVE
*/
return 0; /* remove the dot (continue without output) */
- } else if(loc==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
+ } else if(caseLocale==LOC_TURKISH && c==0x49 && !isFollowedByDotAbove(iter)) {
/*
# When lowercasing, unless an I is before a dot_above, it turns into a dotless i.
0130; 0069 0307; 0130; 0130; # LATIN CAPITAL LETTER I WITH DOT ABOVE
*/
- out.append(iDot);
- return 2;
+ try {
+ out.append(iDot);
+ return 2;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
} else if( c==0x3a3 &&
!isFollowedByCasedLetter(iter, 1) &&
isFollowedByCasedLetter(iter, -1) /* -1=preceded */
/* start of full case mapping strings */
excOffset=(int)(value>>32)+1;
- /* set the output pointer to the lowercase mapping */
- out.append(exceptions, excOffset, full);
+ try {
+ // append the lowercase mapping
+ out.append(exceptions, excOffset, excOffset+full);
- /* return the string length */
- return full;
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
/* internal */
private final int toUpperOrTitle(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache,
+ Appendable out,
+ int loc,
boolean upperNotTitle) {
int result;
int props;
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full, index;
excOffset2=excOffset;
if((excWord&EXC_CONDITIONAL_SPECIAL)!=0) {
/* use hardcoded conditions and mappings */
- int loc=getCaseLocale(locale, locCache);
-
if(loc==LOC_TURKISH && c==0x69) {
/*
# Turkish and Azeri
}
if(full!=0) {
- /* set the output pointer to the result string */
- out.append(exceptions, excOffset, full);
-
- /* return the string length */
- return full;
+ try {
+ // append the result string
+ out.append(exceptions, excOffset, excOffset+full);
+
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
}
public final int toFullUpper(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
- return toUpperOrTitle(c, iter, out, locale, locCache, true);
+ Appendable out,
+ int caseLocale) {
+ return toUpperOrTitle(c, iter, out, caseLocale, true);
}
public final int toFullTitle(int c, ContextIterator iter,
- StringBuilder out,
- ULocale locale, int[] locCache) {
- return toUpperOrTitle(c, iter, out, locale, locCache, false);
+ Appendable out,
+ int caseLocale) {
+ return toUpperOrTitle(c, iter, out, caseLocale, false);
}
/* case folding ------------------------------------------------------------- */
}
} else {
int excOffset=getExceptionsOffset(props);
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int index;
if((excWord&EXC_CONDITIONAL_FOLD)!=0) {
/* special case folding mappings, hardcoded */
* together in a way that they still fold to common result strings.
*/
- public final int toFullFolding(int c, StringBuilder out, int options) {
+ public final int toFullFolding(int c, Appendable out, int options) {
int result;
int props;
}
} else {
int excOffset=getExceptionsOffset(props), excOffset2;
- int excWord=exceptions[excOffset++];
+ int excWord=exceptions.charAt(excOffset++);
int full, index;
excOffset2=excOffset;
return 0x69;
} else if(c==0x130) {
/* 0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE */
- out.append(iDot);
- return 2;
+ try {
+ out.append(iDot);
+ return 2;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
} else {
/* Turkic mappings */
full=(full>>4)&0xf;
if(full!=0) {
- /* set the output pointer to the result string */
- out.append(exceptions, excOffset, full);
-
- /* return the string length */
- return full;
+ try {
+ // append the result string
+ out.append(exceptions, excOffset, excOffset+full);
+
+ /* return the string length */
+ return full;
+ } catch (IOException e) {
+ throw new ICUUncheckedIOException(e);
+ }
}
}
/* case mapping properties API ---------------------------------------------- */
- private static final int[] rootLocCache = { LOC_ROOT };
/*
* We need a StringBuilder for multi-code point output from the
* full case mapping functions. However, we do not actually use that output,
*/
case UProperty.CHANGES_WHEN_LOWERCASED:
dummyStringBuilder.setLength(0);
- return toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_UPPERCASED:
dummyStringBuilder.setLength(0);
- return toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0;
case UProperty.CHANGES_WHEN_TITLECASED:
dummyStringBuilder.setLength(0);
- return toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ return toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
/* case UProperty.CHANGES_WHEN_CASEFOLDED: -- in UCharacterProperty.java */
case UProperty.CHANGES_WHEN_CASEMAPPED:
dummyStringBuilder.setLength(0);
return
- toFullLower(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
- toFullUpper(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0 ||
- toFullTitle(c, null, dummyStringBuilder, ULocale.ROOT, rootLocCache)>=0;
+ toFullLower(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+ toFullUpper(c, null, dummyStringBuilder, LOC_ROOT)>=0 ||
+ toFullTitle(c, null, dummyStringBuilder, LOC_ROOT)>=0;
default:
return false;
}
// data members -------------------------------------------------------- ***
private int indexes[];
- private char exceptions[];
+ private String exceptions;
private char unfold[];
private Trie2_16 trie;
import com.ibm.icu.lang.UCharacterEnums.ECharacterCategory;
import com.ibm.icu.lang.UCharacterEnums.ECharacterDirection;
import com.ibm.icu.text.BreakIterator;
+import com.ibm.icu.text.Edits;
import com.ibm.icu.text.Normalizer2;
import com.ibm.icu.util.RangeValueIterator;
import com.ibm.icu.util.ULocale;
* @stable ICU 3.2
*/
public static String toLowerCase(ULocale locale, String str) {
- StringContextIterator iter = new StringContextIterator(str);
- StringBuilder result = new StringBuilder(str.length());
- int[] locCache = new int[1];
- int c;
-
- if (locale == null) {
- locale = ULocale.getDefault();
+ // TODO: remove package path
+ if (str.length() <= 100) {
+ if (str.isEmpty()) {
+ return str;
+ }
+ // Collect and apply only changes.
+ // Good if no or few changes.
+ // Bad (slow) if many changes.
+ Edits edits = new Edits();
+ StringBuilder replacementChars = com.ibm.icu.text.CaseMap.toLower(
+ locale, com.ibm.icu.text.CaseMap.OMIT_UNCHANGED_TEXT, str,
+ new StringBuilder(), edits);
+ return applyEdits(str, replacementChars, edits);
+ } else {
+ return com.ibm.icu.text.CaseMap.toLower(locale, 0, str, new StringBuilder(), null).toString();
}
- locCache[0]=0;
-
- while((c=iter.nextCaseMapCP())>=0) {
- c = UCaseProps.INSTANCE.toFullLower(c, iter, result, locale, locCache);
+ }
- /* decode the result */
- if(c<0) {
- /* (not) original code point */
- c=~c;
- } else if(c<=UCaseProps.MAX_STRING_LENGTH) {
- /* mapping already appended to result */
- continue;
- /* } else { append single-code point mapping */
+ private static String applyEdits(String str, StringBuilder replacementChars, Edits edits) {
+ if (!edits.hasChanges()) {
+ return str;
+ }
+ StringBuilder result = new StringBuilder(str.length() + edits.lengthDelta());
+ for (Edits.Iterator ei = edits.getCoarseIterator(); ei.next();) {
+ if (ei.hasChange()) {
+ int i = ei.replacementIndex();
+ result.append(replacementChars, i, i + ei.newLength());
+ } else {
+ int i = ei.sourceIndex();
+ result.append(str, i, i + ei.oldLength());
}
- result.appendCodePoint(c);
}
return result.toString();
}
int options) {
StringContextIterator iter = new StringContextIterator(str);
StringBuilder result = new StringBuilder(str.length());
- int[] locCache = new int[1];
int c, nc, srcLength = str.length();
if (locale == null) {
locale = ULocale.getDefault();
}
- locCache[0]=0;
+ int caseLocale = UCaseProps.getCaseLocale(locale);
if(titleIter == null) {
titleIter = BreakIterator.getWordInstance(locale);
if(titleStart<index) {
FirstIJ = true;
/* titlecase c which is from titleStart */
- c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, locale, locCache);
+ c = UCaseProps.INSTANCE.toFullTitle(c, iter, result, caseLocale);
/* decode the result and lowercase up to index */
for(;;) {
FirstIJ = false;
} else {
/* Normal operation: Lowercase the rest of the word. */
- c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, locale,
- locCache);
+ c = UCaseProps.INSTANCE.toFullLower(nc, iter, result, caseLocale);
}
} else {
break;
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.text;
+import com.ibm.icu.impl.UCaseProps;
+import com.ibm.icu.lang.UCharacter;
+import com.ibm.icu.util.ULocale;
+
+// TODO: issues/questions
+// - add java.util.Locale overloads when signatures are settled
+// - optimizing strategies for unstyled text: stop after number of changes or length of replacement?
+
/**
+ * Low-level case mapping functions.
+ *
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
public final class CaseMap {
+ /**
+ * Omit unchanged text when case-mapping with Edits.
+ *
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static final int OMIT_UNCHANGED_TEXT = 0x4000;
+
+ /**
+ * Lowercases a string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID.
+ * @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toLowerCase(ULocale, String)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static <A extends Appendable> A toLower(
+ ULocale locale, int options, CharSequence src, A dest, Edits edits) {
+ if (locale == null) {
+ locale = ULocale.getDefault();
+ }
+ int caseLocale = UCaseProps.getCaseLocale(locale);
+ // TODO: remove package path
+ return com.ibm.icu.impl.CaseMap.toLower(caseLocale, options, src, dest, edits);
+ }
+
+ /**
+ * Uppercases a string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID.
+ * @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT}.
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toUpperCase(ULocale, String)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static <A extends Appendable> A toUpper(
+ ULocale locale, int options, CharSequence src, A dest, Edits edits) {
+ return null;
+ }
+
+ /**
+ * Titlecases a string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID.
+ * @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
+ * {@link UCharacter#TITLECASE_NO_LOWERCASE},
+ * {@link UCharacter#TITLECASE_NO_BREAK_ADJUSTMENT}.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setText())
+ * and used one or more times for iteration (first() and next()).
+ * If null, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#toTitleCase(ULocale, String, BreakIterator, int)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static <A extends Appendable> A toTitle(
+ ULocale locale, int options, BreakIterator iter,
+ CharSequence src, A dest, Edits edits) {
+ return null;
+ }
+ /**
+ * Case-folds a string and optionally records edits.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Options bit set, usually 0. See {@link #OMIT_UNCHANGED_TEXT},
+ * {@link UCharacter#FOLD_CASE_DEFAULT},
+ * {@link UCharacter#FOLD_CASE_EXCLUDE_SPECIAL_I}.
+ * @param src The original string.
+ * @param dest A buffer for the result string. Must not be null.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * This function calls edits.reset() first. edits can be null.
+ * @return dest with the result string (or only changes) appended.
+ *
+ * @see UCharacter#foldCase(String, int)
+ * @draft ICU 59
+ * @provisional This API might change or be removed in a future release.
+ */
+ public static <A extends Appendable> A foldCase(
+ int options, CharSequence src, A dest, Edits edits) {
+ return null;
+ }
}
* Supports replacements, insertions, deletions in linear progression.
* Does not support moving/reordering of text.
*
- * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
- * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
- *
* @draft ICU 59
* @provisional This API might change or be removed in a future release.
*/
int n = getRangeCount();
int result;
StringBuilder full = new StringBuilder();
- int locCache[] = new int[1];
for (int i=0; i<n; ++i) {
int start = getRangeStart(i);
// add case mappings
// (does not add long s for regular s, or Kelvin for k, for example)
for (int cp=start; cp<=end; ++cp) {
- result = csp.toFullLower(cp, null, full, root, locCache);
+ result = csp.toFullLower(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
- result = csp.toFullTitle(cp, null, full, root, locCache);
+ result = csp.toFullTitle(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
- result = csp.toFullUpper(cp, null, full, root, locCache);
+ result = csp.toFullUpper(cp, null, full, UCaseProps.LOC_ROOT);
addCaseMapping(foldSet, result, full);
result = csp.toFullFolding(cp, full, 0);
} else {
BreakIterator bi = BreakIterator.getWordInstance(root);
for (String str : strings) {
+ // TODO: call lower-level functions
foldSet.add(UCharacter.toLowerCase(root, str));
foldSet.add(UCharacter.toTitleCase(root, str, bi));
foldSet.add(UCharacter.toUpperCase(root, str));
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) {
- c=csp.toFullLower(c, iter, result, locale, locCache);
+ c=csp.toFullLower(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
type=csp.getTypeOrIgnorable(c);
if(type>=0) { // not case-ignorable
if(doTitle) {
- c=csp.toFullTitle(c, iter, result, locale, locCache);
+ c=csp.toFullTitle(c, iter, result, caseLocale);
} else {
- c=csp.toFullLower(c, iter, result, locale, locCache);
+ c=csp.toFullLower(c, iter, result, caseLocale);
}
doTitle = type==0; // doTitle=isUncased
private final UCaseProps csp;
private ReplaceableContextIterator iter;
private StringBuilder result;
- private int[] locCache;
+ private int caseLocale;
/**
* Constructs a transliterator.
csp=UCaseProps.INSTANCE;
iter=new ReplaceableContextIterator();
result = new StringBuilder();
- locCache = new int[1];
- locCache[0]=0;
+ caseLocale = UCaseProps.getCaseLocale(locale);
}
/**
iter.setLimit(offsets.limit);
iter.setContextLimits(offsets.contextStart, offsets.contextLimit);
while((c=iter.nextCaseMapCP())>=0) {
- c=csp.toFullUpper(c, iter, result, locale, locCache);
+ c=csp.toFullUpper(c, iter, result, caseLocale);
if(iter.didReachLimit() && isIncremental) {
// the case mapping function tried to look beyond the context limit