]> granicus.if.org Git - icu/commitdiff
ICU-10688 remove break type dependency from dictionaries in break iterators.
authorAndy Heninger <andy.heninger@gmail.com>
Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
committerAndy Heninger <andy.heninger@gmail.com>
Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
X-SVN-Rev: 40688

icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java
icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java
icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java
icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java
icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java
icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java

index efae6f7da31596dc9400612cfb247ecd664051ac..6dc682a7d49a597399e6489faedf884c42e3568c 100644 (file)
@@ -61,7 +61,6 @@ class BurmeseBreakEngine extends DictionaryBreakEngine {
     }
 
     public BurmeseBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
         setCharacters(fBurmeseWordSet);
         // Initialize dictionary
         fDictionary = DictionaryData.loadDictionaryFor("Mymr");
index b2c4c61b7fb25e61f0d330537beeb9c7c248fa7c..9ae2992c66d779cea94d22994a9d7b4c9a0ad131 100644 (file)
@@ -38,7 +38,6 @@ class CjkBreakEngine extends DictionaryBreakEngine {
     private DictionaryMatcher fDictionary = null;
 
     public CjkBreakEngine(boolean korean) throws IOException {
-        super(BreakIterator.KIND_WORD);
         fDictionary = DictionaryData.loadDictionaryFor("Hira");
         if (korean) {
             setCharacters(fHangulWordSet);
index dea25a108b00a256f2fbb04771d6db83ddcea29d..76db7669ea61e5c9a4dcc9ab5501c833100af8c4 100644 (file)
@@ -169,16 +169,11 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine {
     }
 
     UnicodeSet fSet = new UnicodeSet();
-    private BitSet fTypes = new BitSet(32);
 
     /**
-     * @param breakTypes The types of break iterators that can use this engine.
-     *  For example, BreakIterator.KIND_LINE
+     *  Constructor
      */
-    public DictionaryBreakEngine(Integer... breakTypes) {
-        for (Integer type: breakTypes) {
-            fTypes.set(type);
-        }
+    public DictionaryBreakEngine() {
     }
 
     @Override
index 7c8926c982f0cd956df9894260ff586791ef3cf2..f2a3a46cc0f9b572edc05e0be6fc14acdbcc1cf8 100644 (file)
@@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 
 class KhmerBreakEngine extends DictionaryBreakEngine {
-    
+
     // Constants for KhmerBreakIterator
     // How many words in a row are "good enough"?
     private static final byte KHMER_LOOKAHEAD = 3;
@@ -29,14 +29,14 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
     private static final byte KHMER_MIN_WORD = 2;
     // Minimum number of characters for two words
     private static final byte KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
-    
-    
+
+
     private DictionaryMatcher fDictionary;
     private static UnicodeSet fKhmerWordSet;
     private static UnicodeSet fEndWordSet;
     private static UnicodeSet fBeginWordSet;
     private static UnicodeSet fMarkSet;
-    
+
     static {
         // Initialize UnicodeSets
         fKhmerWordSet = new UnicodeSet();
@@ -56,42 +56,42 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
         fMarkSet.compact();
         fEndWordSet.compact();
         fBeginWordSet.compact();
-        
+
         // Freeze the static UnicodeSet
         fKhmerWordSet.freeze();
         fMarkSet.freeze();
         fEndWordSet.freeze();
         fBeginWordSet.freeze();
     }
-    
+
     public KhmerBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
         setCharacters(fKhmerWordSet);
         // Initialize dictionary
         fDictionary = DictionaryData.loadDictionaryFor("Khmr");
     }
 
+    @Override
     public boolean equals(Object obj) {
         // Normally is a singleton, but it's possible to have duplicates
         //   during initialization. All are equivalent.
         return obj instanceof KhmerBreakEngine;
     }
 
+    @Override
     public int hashCode() {
         return getClass().hashCode();
     }
-    public boolean handles(int c, int breakType) {
-        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
-            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
-            return (script == UScript.KHMER);
-        }
-        return false;
+
+    @Override
+    public boolean handles(int c) {
+        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
+        return (script == UScript.KHMER);
     }
 
-    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd, 
+    @Override
+    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
             DequeI foundBreaks) {
-               
+
         if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
             return 0;  // Not enough characters for word
         }
@@ -163,7 +163,7 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
                 // no preceding word, or the non-word shares less than the minimum threshold
                 // of characters with a dictionary word, then scan to resynchronize
                 if (words[wordsFound%KHMER_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
-                        (wordLength == 0 || 
+                        (wordLength == 0 ||
                                 words[wordsFound%KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
                     // Look for a plausible word boundary
                     int remaining = rangeEnd - (current + wordLength);
@@ -209,7 +209,7 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
 
             // Look ahead for possible suffixes if a dictionary word does not follow.
             // We do this in code rather than using a rule so that the heuristic
-            // resynch continues to function. For example, one of the suffix characters 
+            // resynch continues to function. For example, one of the suffix characters
             // could be a typo in the middle of a word.
             // NOT CURRENTLY APPLICABLE TO KHMER
 
index ee53adf90b00f1c03d1c661e43814d899c3dc002..d9f13febe75fbb976904b799ac076b8a4ea67fef 100644 (file)
@@ -64,7 +64,6 @@ class LaoBreakEngine extends DictionaryBreakEngine {
     }
 
     public LaoBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
         setCharacters(fLaoWordSet);
         // Initialize dictionary
         fDictionary = DictionaryData.loadDictionaryFor("Laoo");
index 84717018c707388355d0aff8804c112cb66302b0..07855b1986578fd827d0b08404086fd3fc5e6c01 100644 (file)
@@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 
 class ThaiBreakEngine extends DictionaryBreakEngine {
-    
+
     // Constants for ThaiBreakIterator
     // How many words in a row are "good enough"?
     private static final byte THAI_LOOKAHEAD = 3;
@@ -33,14 +33,14 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
     private static final byte THAI_MIN_WORD = 2;
     // Minimum number of characters for two words
     private static final byte THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
-    
+
     private DictionaryMatcher fDictionary;
     private static UnicodeSet fThaiWordSet;
     private static UnicodeSet fEndWordSet;
     private static UnicodeSet fBeginWordSet;
     private static UnicodeSet fSuffixSet;
     private static UnicodeSet fMarkSet;
-    
+
     static {
         // Initialize UnicodeSets
         fThaiWordSet = new UnicodeSet();
@@ -66,7 +66,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
         fEndWordSet.compact();
         fBeginWordSet.compact();
         fSuffixSet.compact();
-        
+
         // Freeze the static UnicodeSet
         fThaiWordSet.freeze();
         fMarkSet.freeze();
@@ -74,32 +74,32 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
         fBeginWordSet.freeze();
         fSuffixSet.freeze();
     }
-    
+
     public ThaiBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
         setCharacters(fThaiWordSet);
         // Initialize dictionary
         fDictionary = DictionaryData.loadDictionaryFor("Thai");
     }
-    
+
+    @Override
     public boolean equals(Object obj) {
         // Normally is a singleton, but it's possible to have duplicates
         //   during initialization. All are equivalent.
         return obj instanceof ThaiBreakEngine;
     }
 
+    @Override
     public int hashCode() {
         return getClass().hashCode();
     }
-    
-    public boolean handles(int c, int breakType) {
-        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
-            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
-            return (script == UScript.THAI);
-        }
-        return false;
+
+    @Override
+    public boolean handles(int c) {
+        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
+        return (script == UScript.THAI);
     }
 
+    @Override
     public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
             DequeI foundBreaks) {
 
@@ -112,7 +112,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
         for (int i = 0; i < THAI_LOOKAHEAD; i++) {
             words[i] = new PossibleWord();
         }
-        
+
         int uc;
         fIter.setIndex(rangeStart);
         int current;
@@ -156,7 +156,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
                                 }
                             } while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(fIter));
                         }
-                    } 
+                    }
                     while (words[wordsFound%THAI_LOOKAHEAD].backUp(fIter));
                     // foundBest: end of loop
                 }
@@ -174,7 +174,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
                 // no preceding word, or the non-word shares less than the minimum threshold
                 // of characters with a dictionary word, then scan to resynchronize
                 if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
-                        (wordLength == 0 || 
+                        (wordLength == 0 ||
                                 words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
                     // Look for a plausible word boundary
                     int remaining = rangeEnd - (current + wordLength);
@@ -224,7 +224,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
 
             // Look ahead for possible suffixes if a dictionary word does not follow.
             // We do this in code rather than using a rule so that the heuristic
-            // resynch continues to function. For example, one of the suffix characters 
+            // resynch continues to function. For example, one of the suffix characters
             // could be a typo in the middle of a word.
             if (fIter.getIndex() < rangeEnd && wordLength > 0) {
                 if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&