case UScript.KATAKANA:
case UScript.HIRAGANA:
case UScript.HAN:
- if (getBreakType() == KIND_WORD)
+ if (getBreakType() == KIND_WORD) {
eng = new CjkBreakEngine(false);
+ }
+ else {
+ fUnhandledBreakEngine.handleChar(c, getBreakType());
+ eng = fUnhandledBreakEngine;
+ }
break;
case UScript.HANGUL:
- if (getBreakType() == KIND_WORD)
+ if (getBreakType() == KIND_WORD) {
eng = new CjkBreakEngine(true);
+ } else {
+ fUnhandledBreakEngine.handleChar(c, getBreakType());
+ eng = fUnhandledBreakEngine;
+ }
break;
default:
fUnhandledBreakEngine.handleChar(c, getBreakType());
fOtherSet.removeAll(fExtendSet);
fOtherSet.removeAll(fExtendNumLetSet);
// Inhibit dictionary characters from being tested at all.
- fOtherSet.removeAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
+ // remove surrogates so as to not generate higher CJK characters
+ fOtherSet.removeAll(new UnicodeSet("[[\\p{LineBreak = Complex_Context}][:Line_Break=Surrogate:]]"));
fOtherSet.removeAll(fDictionaryCjkSet);
fSets = new ArrayList();
fSets.add(fWJ);
fSets.add(fSA);
fSets.add(fSG);
-
}
void setText(StringBuffer s) {