ICU-10688 remove break type dependency from dictionaries in break iterators.

author Andy Heninger <andy.heninger@gmail.com>

Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)

committer Andy Heninger <andy.heninger@gmail.com>

Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
author Andy Heninger <andy.heninger@gmail.com>
Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
committer Andy Heninger <andy.heninger@gmail.com>
Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java

index efae6f7da31596dc9400612cfb247ecd664051ac..6dc682a7d49a597399e6489faedf884c42e3568c 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java
@@ -61,7 +61,6 @@ class BurmeseBreakEngine extends DictionaryBreakEngine {
      }
  
      public BurmeseBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
          setCharacters(fBurmeseWordSet);
          // Initialize dictionary
          fDictionary = DictionaryData.loadDictionaryFor("Mymr");
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java

index b2c4c61b7fb25e61f0d330537beeb9c7c248fa7c..9ae2992c66d779cea94d22994a9d7b4c9a0ad131 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java
@@ -38,7 +38,6 @@ class CjkBreakEngine extends DictionaryBreakEngine {
      private DictionaryMatcher fDictionary = null;
  
      public CjkBreakEngine(boolean korean) throws IOException {
-        super(BreakIterator.KIND_WORD);
          fDictionary = DictionaryData.loadDictionaryFor("Hira");
          if (korean) {
              setCharacters(fHangulWordSet);
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java

index dea25a108b00a256f2fbb04771d6db83ddcea29d..76db7669ea61e5c9a4dcc9ab5501c833100af8c4 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java
@@ -169,16 +169,11 @@ abstract class DictionaryBreakEngine implements LanguageBreakEngine {
      }
  
      UnicodeSet fSet = new UnicodeSet();
-    private BitSet fTypes = new BitSet(32);
  
      /**
-     * @param breakTypes The types of break iterators that can use this engine.
-     *  For example, BreakIterator.KIND_LINE
+     *  Constructor
       */
-    public DictionaryBreakEngine(Integer... breakTypes) {
-        for (Integer type: breakTypes) {
-            fTypes.set(type);
-        }
+    public DictionaryBreakEngine() {
      }
  
      @Override
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java

index 7c8926c982f0cd956df9894260ff586791ef3cf2..f2a3a46cc0f9b572edc05e0be6fc14acdbcc1cf8 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java
@@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty;
  import com.ibm.icu.lang.UScript;
  
  class KhmerBreakEngine extends DictionaryBreakEngine {
-    
+
      // Constants for KhmerBreakIterator
      // How many words in a row are "good enough"?
      private static final byte KHMER_LOOKAHEAD = 3;
@@ -29,14 +29,14 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
      private static final byte KHMER_MIN_WORD = 2;
      // Minimum number of characters for two words
      private static final byte KHMER_MIN_WORD_SPAN = KHMER_MIN_WORD * 2;
-    
-    
+
+
      private DictionaryMatcher fDictionary;
      private static UnicodeSet fKhmerWordSet;
      private static UnicodeSet fEndWordSet;
      private static UnicodeSet fBeginWordSet;
      private static UnicodeSet fMarkSet;
-    
+
      static {
          // Initialize UnicodeSets
          fKhmerWordSet = new UnicodeSet();
@@ -56,42 +56,42 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
          fMarkSet.compact();
          fEndWordSet.compact();
          fBeginWordSet.compact();
-        
+
          // Freeze the static UnicodeSet
          fKhmerWordSet.freeze();
          fMarkSet.freeze();
          fEndWordSet.freeze();
          fBeginWordSet.freeze();
      }
-    
+
      public KhmerBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
          setCharacters(fKhmerWordSet);
          // Initialize dictionary
          fDictionary = DictionaryData.loadDictionaryFor("Khmr");
      }
  
+    @Override
      public boolean equals(Object obj) {
          // Normally is a singleton, but it's possible to have duplicates
          //   during initialization. All are equivalent.
          return obj instanceof KhmerBreakEngine;
      }
  
+    @Override
      public int hashCode() {
          return getClass().hashCode();
      }
- 
-    public boolean handles(int c, int breakType) {
-        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
-            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
-            return (script == UScript.KHMER);
-        }
-        return false;
+
+    @Override
+    public boolean handles(int c) {
+        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
+        return (script == UScript.KHMER);
      }
  
-    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd, 
+    @Override
+    public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
              DequeI foundBreaks) {
-               
+
          if ((rangeEnd - rangeStart) < KHMER_MIN_WORD_SPAN) {
              return 0;  // Not enough characters for word
          }
@@ -163,7 +163,7 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
                  // no preceding word, or the non-word shares less than the minimum threshold
                  // of characters with a dictionary word, then scan to resynchronize
                  if (words[wordsFound%KHMER_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
-                        (wordLength == 0 || 
+                        (wordLength == 0 ||
                                  words[wordsFound%KHMER_LOOKAHEAD].longestPrefix() < KHMER_PREFIX_COMBINE_THRESHOLD)) {
                      // Look for a plausible word boundary
                      int remaining = rangeEnd - (current + wordLength);
@@ -209,7 +209,7 @@ class KhmerBreakEngine extends DictionaryBreakEngine {
  
              // Look ahead for possible suffixes if a dictionary word does not follow.
              // We do this in code rather than using a rule so that the heuristic
-            // resynch continues to function. For example, one of the suffix characters 
+            // resynch continues to function. For example, one of the suffix characters
              // could be a typo in the middle of a word.
              // NOT CURRENTLY APPLICABLE TO KHMER
  
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java

index ee53adf90b00f1c03d1c661e43814d899c3dc002..d9f13febe75fbb976904b799ac076b8a4ea67fef 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java
@@ -64,7 +64,6 @@ class LaoBreakEngine extends DictionaryBreakEngine {
      }
  
      public LaoBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
          setCharacters(fLaoWordSet);
          // Initialize dictionary
          fDictionary = DictionaryData.loadDictionaryFor("Laoo");
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java b/icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java

index 84717018c707388355d0aff8804c112cb66302b0..07855b1986578fd827d0b08404086fd3fc5e6c01 100644 (file)
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java
@@ -16,7 +16,7 @@ import com.ibm.icu.lang.UProperty;
  import com.ibm.icu.lang.UScript;
  
  class ThaiBreakEngine extends DictionaryBreakEngine {
-    
+
      // Constants for ThaiBreakIterator
      // How many words in a row are "good enough"?
      private static final byte THAI_LOOKAHEAD = 3;
@@ -33,14 +33,14 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
      private static final byte THAI_MIN_WORD = 2;
      // Minimum number of characters for two words
      private static final byte THAI_MIN_WORD_SPAN = THAI_MIN_WORD * 2;
-    
+
      private DictionaryMatcher fDictionary;
      private static UnicodeSet fThaiWordSet;
      private static UnicodeSet fEndWordSet;
      private static UnicodeSet fBeginWordSet;
      private static UnicodeSet fSuffixSet;
      private static UnicodeSet fMarkSet;
-    
+
      static {
          // Initialize UnicodeSets
          fThaiWordSet = new UnicodeSet();
@@ -66,7 +66,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
          fEndWordSet.compact();
          fBeginWordSet.compact();
          fSuffixSet.compact();
-        
+
          // Freeze the static UnicodeSet
          fThaiWordSet.freeze();
          fMarkSet.freeze();
@@ -74,32 +74,32 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
          fBeginWordSet.freeze();
          fSuffixSet.freeze();
      }
-    
+
      public ThaiBreakEngine() throws IOException {
-        super(BreakIterator.KIND_WORD, BreakIterator.KIND_LINE);
          setCharacters(fThaiWordSet);
          // Initialize dictionary
          fDictionary = DictionaryData.loadDictionaryFor("Thai");
      }
-    
+
+    @Override
      public boolean equals(Object obj) {
          // Normally is a singleton, but it's possible to have duplicates
          //   during initialization. All are equivalent.
          return obj instanceof ThaiBreakEngine;
      }
  
+    @Override
      public int hashCode() {
          return getClass().hashCode();
      }
-    
-    public boolean handles(int c, int breakType) {
-        if (breakType == BreakIterator.KIND_WORD || breakType == BreakIterator.KIND_LINE) {
-            int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
-            return (script == UScript.THAI);
-        }
-        return false;
+
+    @Override
+    public boolean handles(int c) {
+        int script = UCharacter.getIntPropertyValue(c, UProperty.SCRIPT);
+        return (script == UScript.THAI);
      }
  
+    @Override
      public int divideUpDictionaryRange(CharacterIterator fIter, int rangeStart, int rangeEnd,
              DequeI foundBreaks) {
  
@@ -112,7 +112,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
          for (int i = 0; i < THAI_LOOKAHEAD; i++) {
              words[i] = new PossibleWord();
          }
-        
+
          int uc;
          fIter.setIndex(rangeStart);
          int current;
@@ -156,7 +156,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
                                  }
                              } while (words[(wordsFound+1)%THAI_LOOKAHEAD].backUp(fIter));
                          }
-                    } 
+                    }
                      while (words[wordsFound%THAI_LOOKAHEAD].backUp(fIter));
                      // foundBest: end of loop
                  }
@@ -174,7 +174,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
                  // no preceding word, or the non-word shares less than the minimum threshold
                  // of characters with a dictionary word, then scan to resynchronize
                  if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
-                        (wordLength == 0 || 
+                        (wordLength == 0 ||
                                  words[wordsFound%THAI_LOOKAHEAD].longestPrefix() < THAI_PREFIX_COMBINE_THRESHOLD)) {
                      // Look for a plausible word boundary
                      int remaining = rangeEnd - (current + wordLength);
@@ -224,7 +224,7 @@ class ThaiBreakEngine extends DictionaryBreakEngine {
  
              // Look ahead for possible suffixes if a dictionary word does not follow.
              // We do this in code rather than using a rule so that the heuristic
-            // resynch continues to function. For example, one of the suffix characters 
+            // resynch continues to function. For example, one of the suffix characters
              // could be a typo in the middle of a word.
              if (fIter.getIndex() < rangeEnd && wordLength > 0) {
                  if (words[wordsFound%THAI_LOOKAHEAD].candidates(fIter, fDictionary, rangeEnd) <= 0 &&
author	Andy Heninger <andy.heninger@gmail.com>
	Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
committer	Andy Heninger <andy.heninger@gmail.com>
	Mon, 4 Dec 2017 19:27:48 +0000 (19:27 +0000)
icu4j/main/classes/core/src/com/ibm/icu/text/BurmeseBreakEngine.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/CjkBreakEngine.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/DictionaryBreakEngine.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/KhmerBreakEngine.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/LaoBreakEngine.java		patch \| blob \| history
icu4j/main/classes/core/src/com/ibm/icu/text/ThaiBreakEngine.java		patch \| blob \| history