import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.text.FilteredBreakIteratorBuilder;
import com.ibm.icu.text.UCharacterIterator;
-import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.util.BytesTrie;
import com.ibm.icu.util.CharsTrie;
import com.ibm.icu.util.CharsTrieBuilder;
private UCharacterIterator text; // TODO(Tom): suffice to move into the local scope in next() ?
private CharsTrie backwardsTrie; // i.e. ".srM" for Mrs.
private CharsTrie forwardsPartialTrie; // Has ".a" for "a.M."
- private UnicodeSet glueSet = null;
/**
* @param adoptBreakIterator
* forward & partial char trie to adopt
* @param backwardsTrie
* backward trie to adopt
- * @param glueSet the glue set to adopt
*/
public SimpleFilteredSentenceBreakIterator(BreakIterator adoptBreakIterator, CharsTrie forwardsPartialTrie,
- CharsTrie backwardsTrie, UnicodeSet glueSet) {
+ CharsTrie backwardsTrie) {
this.delegate = adoptBreakIterator;
this.forwardsPartialTrie = forwardsPartialTrie;
this.backwardsTrie = backwardsTrie;
- if(!glueSet.isEmpty()) {
- this.glueSet = new UnicodeSet(glueSet).freeze(); // copy
- }
}
uch = text.nextCodePoint();
}
- // Check for a glue character
- if(this.glueSet != null && text.getIndex()<text.getLength()) {
- uch = text.nextCodePoint();
- if(glueSet.contains(uch)) {
- // Glued - suppress this break.
- return false; // no break here.
- } else {
- uch = text.previousCodePoint(); // move back again.
- }
- }
-
BytesTrie.Result r = BytesTrie.Result.INTERMEDIATE_VALUE;
while ((uch = text.previousCodePoint()) != UCharacterIterator.DONE && // more to consume backwards and..
static final int SuppressInReverse = (1 << 0);
static final int AddToForward = (1 << 1);
- private UnicodeSet glueSet = new UnicodeSet();
-
public Builder(Locale loc) {
this(ULocale.forLocale(loc));
}
if (fwdCount > 0) {
forwardsPartialTrie = builder2.build(StringTrieBuilder.Option.FAST);
}
- return new SimpleFilteredSentenceBreakIterator(adoptBreakIterator, forwardsPartialTrie, backwardsTrie, glueSet);
- }
-
- /* (non-Javadoc)
- * @see com.ibm.icu.text.FilteredBreakIteratorBuilder#setGlueCharacters(com.ibm.icu.text.UnicodeSet)
- * @internal
- */
- public void setGlueCharacters(UnicodeSet set) {
- if (set == null || set.isEmpty()) {
- glueSet.clear();
- } else {
- glueSet.set(set);
- }
+ return new SimpleFilteredSentenceBreakIterator(adoptBreakIterator, forwardsPartialTrie, backwardsTrie);
}
}
}