]> granicus.if.org Git - icu/commitdiff
ICU-9353 Do not generate surrogates in the Monkey test so as to avoid generating...
authorMaxime Serrano <mserrano@svn.icu-project.org>
Fri, 17 Aug 2012 21:27:08 +0000 (21:27 +0000)
committerMaxime Serrano <mserrano@svn.icu-project.org>
Fri, 17 Aug 2012 21:27:08 +0000 (21:27 +0000)
X-SVN-Rev: 32195

icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java

index 787f76ca7c1789f563ab811b271683c4682474ca..4ff719874318954f3f04cd92ecc299f463d29f8d 100644 (file)
@@ -1036,12 +1036,21 @@ public class RuleBasedBreakIterator extends BreakIterator {
             case UScript.KATAKANA:
             case UScript.HIRAGANA:
             case UScript.HAN:
-                if (getBreakType() == KIND_WORD)
+                if (getBreakType() == KIND_WORD) {
                     eng = new CjkBreakEngine(false);
+                }
+                else {
+                    fUnhandledBreakEngine.handleChar(c, getBreakType());
+                    eng = fUnhandledBreakEngine;
+                }
                 break;
             case UScript.HANGUL:
-                if (getBreakType() == KIND_WORD)
+                if (getBreakType() == KIND_WORD) {
                     eng = new CjkBreakEngine(true);
+                } else {
+                    fUnhandledBreakEngine.handleChar(c, getBreakType());
+                    eng = fUnhandledBreakEngine;
+                }
                 break;
             default:
                 fUnhandledBreakEngine.handleChar(c, getBreakType());
index 6b3c1308bd37c0a43cda026877d4a342647b24f5..9e3719b5348f1d9f201a6cd7747ee0951358fd92 100644 (file)
@@ -300,7 +300,8 @@ public class RBBITestMonkey extends TestFmwk {
             fOtherSet.removeAll(fExtendSet);
             fOtherSet.removeAll(fExtendNumLetSet);
             // Inhibit dictionary characters from being tested at all.
-            fOtherSet.removeAll(new UnicodeSet("[\\p{LineBreak = Complex_Context}]"));
+            // remove surrogates so as to not generate higher CJK characters
+            fOtherSet.removeAll(new UnicodeSet("[[\\p{LineBreak = Complex_Context}][:Line_Break=Surrogate:]]"));
             fOtherSet.removeAll(fDictionaryCjkSet);
 
             fSets            = new ArrayList();
@@ -613,7 +614,6 @@ public class RBBITestMonkey extends TestFmwk {
             fSets.add(fWJ);
             fSets.add(fSA);
             fSets.add(fSG);
-            
         }
         
         void setText(StringBuffer s) {