ICU-21006 Improve output from RBBI Monkey Test in ICU4J

author Frank Tang <ftang@chromium.org>

Mon, 9 Mar 2020 16:36:52 +0000 (16:36 +0000)

committer Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>

Tue, 10 Mar 2020 03:17:00 +0000 (20:17 -0700)
author Frank Tang <ftang@chromium.org>
Mon, 9 Mar 2020 16:36:52 +0000 (16:36 +0000)
committer Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
Tue, 10 Mar 2020 03:17:00 +0000 (20:17 -0700)
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java

index 39c0e19d01286fa2a402fa591f5dcf0cfa63e05e..10e05220f6ac896c8118defe7a3de7bd7f953152 100644 (file)
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
@@ -54,6 +54,11 @@ public class RBBITestMonkey extends TestFmwk {
      //        testing, but works purely in terms of the interface defined here.
      //
      abstract static class RBBIMonkeyKind {
+        RBBIMonkeyKind() {
+            fSets = new  ArrayList();
+            fClassNames = new ArrayList();
+            fAppliedRules = new ArrayList();
+        }
  
          // Return a List of UnicodeSets, representing the character classes used
          //   for this type of iterator.
@@ -66,10 +71,59 @@ public class RBBITestMonkey extends TestFmwk {
          // Return -1 after reaching end of string.
          abstract   int   next(int i);
  
+        // Name of each character class, parallel with charClasses. Used for debugging output
+        // of characters.
+        List<String> characterClassNames() {
+            return fClassNames;
+        }
+
+        void setAppliedRule(int position, String value) {
+            fAppliedRules.set(position, value);
+        }
+
+        String getAppliedRule(int position) {
+            return fAppliedRules.get(position);
+        }
+
+        String classNameFromCodepoint(int c) {
+            // Simply iterate through fSets to find character's class
+            for (int aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
+                UnicodeSet classSet = (UnicodeSet)charClasses().get(aClassNum);
+                if (classSet.contains(c)) {
+                    return fClassNames.get(aClassNum);
+                }
+            }
+            return "bad class name";
+        }
+
+        int maxClassNameSize() {
+            int maxSize = 0;
+            for (int aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
+                if (fClassNames.get(aClassNum).length() > maxSize) {
+                    maxSize = fClassNames.get(aClassNum).length();
+                }
+            }
+            return maxSize;
+        }
+
+        // Clear `appliedRules` and fill it with empty strings in the size of test text.
+        void prepareAppliedRules(int size) {
+            // Remove all the information in the `appliedRules`.
+            fAppliedRules.clear();
+            fAppliedRules.ensureCapacity(size + 1);
+            while (fAppliedRules.size() < size + 1) {
+                fAppliedRules.add("");
+            }
+        }
+
          // A Character Property, one of the constants defined in class UProperty.
          //   The value of this property will be displayed for the characters
          //    near any test failure.
          int   fCharProperty;
+
+        List fSets;
+        ArrayList<String> fClassNames;
+        ArrayList<String> fAppliedRules;
      }
  
      /**
@@ -77,8 +131,6 @@ public class RBBITestMonkey extends TestFmwk {
       * Note: As of Unicode 6.1, fPrependSet is empty, so don't add it to fSets
       */
      static class RBBICharMonkey extends RBBIMonkeyKind {
-        List                      fSets;
-
          UnicodeSet                fCRLFSet;
          UnicodeSet                fControlSet;
          UnicodeSet                fExtendSet;
@@ -101,7 +153,6 @@ public class RBBITestMonkey extends TestFmwk {
  
          StringBuffer              fText;
  
-
          RBBICharMonkey() {
              fText       = null;
              fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
@@ -133,28 +184,28 @@ public class RBBITestMonkey extends TestFmwk {
              fAnySet           = new UnicodeSet("[\\u0000-\\U0010ffff]");
  
  
-            fSets       = new ArrayList();
-            fSets.add(fCRLFSet);
-            fSets.add(fControlSet);
-            fSets.add(fExtendSet);
-            fSets.add(fRegionalIndicatorSet);
+            fSets.add(fCRLFSet);               fClassNames.add("CRLF");
+            fSets.add(fControlSet);            fClassNames.add("Control");
+            fSets.add(fExtendSet);             fClassNames.add("Extended");
+            fSets.add(fRegionalIndicatorSet);  fClassNames.add("RegionalIndicator");
              if (!fPrependSet.isEmpty()) {
-                fSets.add(fPrependSet);
+                fSets.add(fPrependSet);        fClassNames.add("Prepend");
              }
-            fSets.add(fSpacingSet);
-            fSets.add(fHangulSet);
-            fSets.add(fAnySet);
-            fSets.add(fZWJSet);
-            fSets.add(fExtendedPictSet);
-            fSets.add(fViramaSet);
-            fSets.add(fLinkingConsonantSet);
-            fSets.add(fExtCccZwjSet);
+            fSets.add(fSpacingSet);            fClassNames.add("Spacing");
+            fSets.add(fHangulSet);             fClassNames.add("Hangul");
+            fSets.add(fAnySet);                fClassNames.add("Any");
+            fSets.add(fZWJSet);                fClassNames.add("ZWJ");
+            fSets.add(fExtendedPictSet);       fClassNames.add("ExtendedPict");
+            fSets.add(fViramaSet);             fClassNames.add("Virama");
+            fSets.add(fLinkingConsonantSet);   fClassNames.add("LinkingConsonant");
+            fSets.add(fExtCccZwjSet);          fClassNames.add("ExtCccZwj");
          }
  
  
          @Override
          void setText(StringBuffer s) {
              fText = s;
+            prepareAppliedRules(s.length());
          }
  
          @Override
@@ -197,74 +248,72 @@ public class RBBITestMonkey extends TestFmwk {
                      continue;
                  }
                  if (p2 == fText.length()) {
-                    // Reached end of string.  Always a break position.
+                    setAppliedRule(p2, "End of String");
                      break;
                  }
  
-                // Rule  GB3   CR x LF
                  //     No Extend or Format characters may appear between the CR and LF,
                  //     which requires the additional check for p2 immediately following p1.
                  //
                  if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
+                    setAppliedRule(p2, "GB 3   CR x LF");
                      continue;
                  }
  
-                // Rule (GB4).   ( Control | CR | LF ) <break>
                  if (fControlSet.contains(c1) ||
                          c1 == 0x0D ||
                          c1 == 0x0A)  {
+                    setAppliedRule(p2, "GB 4   ( Control | CR | LF ) <break>");
                      break;
                  }
  
-                // Rule (GB5)    <break>  ( Control | CR | LF )
-                //
                  if (fControlSet.contains(c2) ||
                          c2 == 0x0D ||
                          c2 == 0x0A)  {
+                    setAppliedRule(p2, "GB 5   <break>  ( Control | CR | LF )");
                      break;
                  }
  
  
-                // Rule (GB6)  L x ( L | V | LV | LVT )
                  if (fLSet.contains(c1) &&
                          (fLSet.contains(c2)  ||
                                  fVSet.contains(c2)  ||
                                  fLVSet.contains(c2) ||
                                  fLVTSet.contains(c2))) {
+                    setAppliedRule(p2, "GB 6   L x ( L | V | LV | LVT )");
                      continue;
                  }
  
-                // Rule (GB7)    ( LV | V )  x  ( V | T )
                  if ((fLVSet.contains(c1) || fVSet.contains(c1)) &&
                          (fVSet.contains(c2) || fTSet.contains(c2)))  {
+                    setAppliedRule(p2, "GB 7   ( LV | V )  x  ( V | T )");
                      continue;
                  }
  
-                // Rule (GB8)    ( LVT | T)  x T
                  if ((fLVTSet.contains(c1) || fTSet.contains(c1)) &&
                          fTSet.contains(c2))  {
+                    setAppliedRule(p2, "GB 8   ( LVT | T)  x T");
                      continue;
                  }
  
-                // Rule (GB9)    x (Extend | ZWJ)
                  if (fExtendSet.contains(c2) || fZWJSet.contains(c2))  {
                      if (!fExtendSet.contains(c1)) {
                          cBase = c1;
                      }
+                    setAppliedRule(p2, "GB 9   x (Extend | ZWJ)");
                      continue;
                  }
  
-                // Rule (GB9a)   x  SpacingMark
                  if (fSpacingSet.contains(c2)) {
+                    setAppliedRule(p2, "GB 9a  x  SpacingMark");
                      continue;
                  }
  
-                // Rule (GB9b)   Prepend x
                  if (fPrependSet.contains(c1)) {
+                    setAppliedRule(p2, "GB 9b  Prepend x");
                      continue;
                  }
  
-                // Rule (GB9.3)  LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant
                  //   Note: Viramas are also included in the ExtCccZwj class.
                  if (fLinkingConsonantSet.contains(c2)) {
                      int pi = p1;
@@ -276,37 +325,39 @@ public class RBBITestMonkey extends TestFmwk {
                          pi = fText.offsetByCodePoints(pi, -1);
                      }
                      if (sawVirama && fLinkingConsonantSet.contains(fText.codePointAt(pi))) {
+                        setAppliedRule(p2, "GB 9.3 LinkingConsonant ExtCccZwj* Virama ExtCccZwj* × LinkingConsonant");
                          continue;
                      }
                  }
  
-                // Rule (GB11)   Extended_Pictographic ZWJ x Extended_Pictographic
                  if (fExtendedPictSet.contains(cBase) && fZWJSet.contains(c1) && fExtendedPictSet.contains(c2) ) {
+                    setAppliedRule(p2, "GB 11  Extended_Pictographic Extend * ZWJ x Extended_Pictographic");
                      continue;
                  }
  
-                // Rule (GB12-13)   Regional_Indicator x Regional_Indicator
                  //                  Note: The first if condition is a little tricky. We only need to force
                  //                      a break if there are three or more contiguous RIs. If there are
                  //                      only two, a break following will occur via other rules, and will include
                  //                      any trailing extend characters, which is needed behavior.
                  if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)
                          && fRegionalIndicatorSet.contains(c2)) {
+                    setAppliedRule(p2, "GB 12-13 Regional_Indicator x Regional_Indicator");
                      break;
                  }
                  if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
+                    setAppliedRule(p2, "GB 12-13 Regional_Indicator x Regional_Indicator");
                      continue;
                  }
  
-                // Rule (GB999)  Any  <break>  Any
+                setAppliedRule(p2, "GB 999 Any <break> Any");
                  break;
              }
  
              breakPos = p2;
              return breakPos;
          }
-    }
  
+    }
  
      /**
       *
@@ -316,7 +367,6 @@ public class RBBITestMonkey extends TestFmwk {
       *
       */
      static class RBBIWordMonkey extends RBBIMonkeyKind {
-        List                      fSets;
          StringBuffer              fText;
  
          UnicodeSet                fCRSet;
@@ -341,7 +391,6 @@ public class RBBITestMonkey extends TestFmwk {
          UnicodeSet                fZWJSet;
          UnicodeSet                fExtendedPictSet;
  
-
          RBBIWordMonkey() {
              fCharProperty    = UProperty.WORD_BREAK;
  
@@ -402,30 +451,28 @@ public class RBBITestMonkey extends TestFmwk {
              fOtherSet.removeAll(new UnicodeSet("[[\\p{LineBreak = Complex_Context}][:Line_Break=Surrogate:]]"));
              fOtherSet.removeAll(fDictionarySet);
  
-            fSets            = new ArrayList();
-            fSets.add(fCRSet);
-            fSets.add(fLFSet);
-            fSets.add(fNewlineSet);
-            fSets.add(fRegionalIndicatorSet);
-            fSets.add(fHebrew_LetterSet);
-            fSets.add(fALetterSet);
+            fSets.add(fCRSet);                    fClassNames.add("CR");
+            fSets.add(fLFSet);                    fClassNames.add("LF");
+            fSets.add(fNewlineSet);               fClassNames.add("Newline");
+            fSets.add(fRegionalIndicatorSet);     fClassNames.add("RegionalIndicator");
+            fSets.add(fHebrew_LetterSet);         fClassNames.add("Hebrew");
+            fSets.add(fALetterSet);               fClassNames.add("ALetter");
              //fSets.add(fKatakanaSet);  // Omit Katakana from fSets, which omits Katakana characters
              // from the test data. They are all in the dictionary set,
              // which this (old, to be retired) monkey test cannot handle.
-            fSets.add(fSingle_QuoteSet);
-            fSets.add(fDouble_QuoteSet);
-            fSets.add(fMidLetterSet);
-            fSets.add(fMidNumLetSet);
-            fSets.add(fMidNumSet);
-            fSets.add(fNumericSet);
-            fSets.add(fFormatSet);
-            fSets.add(fExtendSet);
-            fSets.add(fExtendNumLetSet);
-            fSets.add(fWSegSpaceSet);
-            fSets.add(fRegionalIndicatorSet);
-            fSets.add(fZWJSet);
-            fSets.add(fExtendedPictSet);
-            fSets.add(fOtherSet);
+            fSets.add(fSingle_QuoteSet);          fClassNames.add("Single Quote");
+            fSets.add(fDouble_QuoteSet);          fClassNames.add("Double Quote");
+            fSets.add(fMidLetterSet);             fClassNames.add("MidLetter");
+            fSets.add(fMidNumLetSet);             fClassNames.add("MidNumLet");
+            fSets.add(fMidNumSet);                fClassNames.add("MidNum");
+            fSets.add(fNumericSet);               fClassNames.add("Numeric");
+            fSets.add(fFormatSet);                fClassNames.add("Format");
+            fSets.add(fExtendSet);                fClassNames.add("Extend");
+            fSets.add(fExtendNumLetSet);          fClassNames.add("ExtendNumLet");
+            fSets.add(fWSegSpaceSet);             fClassNames.add("WSegSpace");
+            fSets.add(fZWJSet);                   fClassNames.add("ZWJ");
+            fSets.add(fExtendedPictSet);          fClassNames.add("ExtendedPict");
+            fSets.add(fOtherSet);                 fClassNames.add("Other");
          }
  
  
@@ -437,6 +484,7 @@ public class RBBITestMonkey extends TestFmwk {
          @Override
          void   setText(StringBuffer s) {
              fText = s;
+            prepareAppliedRules(s.length());
          }
  
          @Override
@@ -489,149 +537,146 @@ public class RBBITestMonkey extends TestFmwk {
                      break;
                  }
  
-                // Rule (3)   CR x LF
                  //     No Extend or Format characters may appear between the CR and LF,
                  //     which requires the additional check for p2 immediately following p1.
                  //
                  if (c1==0x0D && c2==0x0A) {
+                    setAppliedRule(p2, "WB 3   CR x LF");
                      continue;
                  }
  
-                // Rule (3a)  Break before and after newlines (including CR and LF)
                  //
                  if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) {
+                    setAppliedRule(p2, "WB 3a  Break before and after newlines (including CR and LF)");
                      break;
                  }
                  if (fCRSet.contains(c2) || fLFSet.contains(c2) || fNewlineSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 3a  Break before and after newlines (including CR and LF)");
                      break;
                  }
  
-                // Rule (3c)    ZWJ x Extended_Pictographic.
                  //              Not ignoring extend chars, so peek into input text to
                  //              get the potential ZWJ, the character immediately preceding c2.
                  if (fZWJSet.contains(fText.codePointBefore(p2)) && fExtendedPictSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 3c  ZWJ x Extended_Pictographic");
                      continue;
                  }
  
-                // Rule (3d)    Keep horizontal whitespace together.
                  if (fWSegSpaceSet.contains(fText.codePointBefore(p2)) && fWSegSpaceSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 3d  Keep horizontal whitespace together");
                      continue;
                  }
  
-                // Rule (5).   (ALetter | Hebrew_Letter) x (ALetter | Hebrew_Letter)
                  if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
                          (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2)))  {
+                    setAppliedRule(p2, "WB 4   (ALetter | Hebrew_Letter) x (ALetter | Hebrew_Letter)");
                      continue;
                  }
  
-                // Rule (6)  (ALetter | Hebrew_Letter)  x  (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
-                //
                  if ( (fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1))   &&
                          (fMidLetterSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2)) &&
                          (setContains(fALetterSet, c3) || setContains(fHebrew_LetterSet, c3))) {
+                    setAppliedRule(p2, "WB 6   (ALetter | Hebrew_Letter)  x  (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)");
                      continue;
                  }
  
-                // Rule (7)  (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote)  x  (ALetter | Hebrew_Letter)
                  if ((fALetterSet.contains(c0) || fHebrew_LetterSet.contains(c0)) &&
                          (fMidLetterSet.contains(c1) || fMidNumLetSet.contains(c1) || fSingle_QuoteSet.contains(c1)) &&
                          (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2))) {
+                    setAppliedRule(p2, "WB 7   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote)  x  (ALetter | Hebrew_Letter)");
                      continue;
                  }
  
-                // Rule (7a)     Hebrew_Letter x Single_Quote
                  if (fHebrew_LetterSet.contains(c1) && fSingle_QuoteSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 7a  Hebrew_Letter x Single_Quote");
                      continue;
                  }
  
-                // Rule (7b)    Hebrew_Letter x Double_Quote Hebrew_Letter
                  if (fHebrew_LetterSet.contains(c1) && fDouble_QuoteSet.contains(c2) && setContains(fHebrew_LetterSet,c3)) {
+                    setAppliedRule(p2, "WB 7b  Hebrew_Letter x Single_Quote");
                      continue;
                  }
  
-                // Rule (7c)    Hebrew_Letter Double_Quote x Hebrew_Letter
                  if (fHebrew_LetterSet.contains(c0) && fDouble_QuoteSet.contains(c1) && fHebrew_LetterSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 7c  Hebrew_Letter Double_Quote x Hebrew_Letter");
                      continue;
                  }
  
-                //  Rule (8)    Numeric x Numeric
                  if (fNumericSet.contains(c1) &&
                          fNumericSet.contains(c2))  {
+                    setAppliedRule(p2, "WB 8   Numeric x Numeric");
                      continue;
                  }
  
-                // Rule (9)    (ALetter | Hebrew_Letter) x Numeric
                  if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
                          fNumericSet.contains(c2))  {
+                    setAppliedRule(p2, "WB 9   (ALetter | Hebrew_Letter) x Numeric");
                      continue;
                  }
  
-                // Rule (10)    Numeric x (ALetter | Hebrew_Letter)
                  if (fNumericSet.contains(c1) &&
                          (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2)))  {
+                    setAppliedRule(p2, "WB 10  Numeric x (ALetter | Hebrew_Letter)");
                      continue;
                  }
  
-                // Rule (11)   Numeric (MidNum | MidNumLet | Single_Quote)  x  Numeric
                  if (fNumericSet.contains(c0) &&
                          (fMidNumSet.contains(c1) || fMidNumLetSet.contains(c1) || fSingle_QuoteSet.contains(c1))  &&
                          fNumericSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 11  Numeric (MidNum | MidNumLet | Single_Quote)  x  Numeric");
                      continue;
                  }
  
-                // Rule (12)  Numeric x (MidNum | MidNumLet | SingleQuote) Numeric
                  if (fNumericSet.contains(c1) &&
                          (fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2))  &&
                          setContains(fNumericSet, c3)) {
+                    setAppliedRule(p2, "WB 12  Numeric x (MidNum | MidNumLet | SingleQuote) Numeric");
                      continue;
                  }
  
-                // Rule (13)  Katakana x Katakana
                  //            Note: matches UAX 29 rules, but doesn't come into play for ICU because
                  //                  all Katakana are handled by the dictionary breaker.
                  if (fKatakanaSet.contains(c1) &&
                          fKatakanaSet.contains(c2))  {
+                    setAppliedRule(p2, "WB 13  Katakana x Katakana");
                      continue;
                  }
  
-                // Rule 13a    (ALetter | Hebrew_Letter | Numeric | KataKana | ExtendNumLet) x ExtendNumLet
                  if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1) ||fNumericSet.contains(c1) ||
                          fKatakanaSet.contains(c1) || fExtendNumLetSet.contains(c1)) &&
                          fExtendNumLetSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 13a (ALetter | Hebrew_Letter | Numeric | KataKana | ExtendNumLet) x ExtendNumLet");
                      continue;
                  }
  
-                // Rule 13b   ExtendNumLet x (ALetter | Hebrew_Letter | Numeric | Katakana)
                  if (fExtendNumLetSet.contains(c1) &&
                          (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2) ||
                                  fNumericSet.contains(c2) || fKatakanaSet.contains(c2)))  {
+                    setAppliedRule(p2, "WB 13b ExtendNumLet x (ALetter | Hebrew_Letter | Numeric | Katakana)");
                      continue;
                  }
  
  
-                // Rule 15 - 17   Group piars of Regional Indicators
                  if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)) {
+                    setAppliedRule(p2, "WB 15-17 Group pairs of Regional Indicators.");
                      break;
                  }
                  if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
+                    setAppliedRule(p2, "WB 15-17 Group pairs of Regional Indicators.");
                      continue;
                  }
  
-                // Rule 999.  Break found here.
+                setAppliedRule(p2, "WB 999");
                  break;
              }
  
              breakPos = p2;
              return breakPos;
          }
-
      }
  
  
      static class RBBILineMonkey extends RBBIMonkeyKind {
-
-        List        fSets;
-
          // UnicodeSets for each of the Line Breaking character classes.
          // Order matches that of Unicode UAX 14, Table 1, which makes it a little easier
          // to verify that they are all accounted for.
@@ -685,7 +730,6 @@ public class RBBITestMonkey extends TestFmwk {
          StringBuffer  fText;
          int           fOrigPositions;
  
-
          // XUnicodeSet is like UnicodeSet, except that the method contains(int codePoint) does not
          // throw exceptions on out-of-range codePoints. This matches ICU4C behavior.
          // The LineMonkey test (ported from ICU4C) relies on this behavior, it uses a value of -1
@@ -705,7 +749,6 @@ public class RBBITestMonkey extends TestFmwk {
          RBBILineMonkey()
          {
              fCharProperty  = UProperty.LINE_BREAK;
-            fSets          = new ArrayList();
  
              fBK    = new XUnicodeSet("[\\p{Line_Break=BK}]");
              fCR    = new XUnicodeSet("[\\p{Line_break=CR}]");
@@ -769,55 +812,56 @@ public class RBBITestMonkey extends TestFmwk {
  
              fHH.add('\u2010');   // Hyphen, '‐'
  
-            fSets.add(fBK);
-            fSets.add(fCR);
-            fSets.add(fLF);
-            fSets.add(fCM);
-            fSets.add(fNL);
-            fSets.add(fWJ);
-            fSets.add(fZW);
-            fSets.add(fGL);
-            fSets.add(fSP);
-            fSets.add(fB2);
-            fSets.add(fBA);
-            fSets.add(fBB);
-            fSets.add(fHY);
-            fSets.add(fCB);
-            fSets.add(fCL);
-            fSets.add(fCP);
-            fSets.add(fEX);
-            fSets.add(fIN);
-            fSets.add(fJL);
-            fSets.add(fJT);
-            fSets.add(fJV);
-            fSets.add(fNS);
-            fSets.add(fOP);
-            fSets.add(fQU);
-            fSets.add(fIS);
-            fSets.add(fNU);
-            fSets.add(fPO);
-            fSets.add(fPR);
-            fSets.add(fSY);
-            fSets.add(fAI);
-            fSets.add(fAL);
-            fSets.add(fH2);
-            fSets.add(fH3);
-            fSets.add(fHL);
-            fSets.add(fID);
-            fSets.add(fWJ);
-            fSets.add(fRI);
-            fSets.add(fSG);
-            fSets.add(fEB);
-            fSets.add(fEM);
-            fSets.add(fZWJ);
+            fSets.add(fBK);     fClassNames.add("BK");
+            fSets.add(fCR);     fClassNames.add("CR");
+            fSets.add(fLF);     fClassNames.add("LF");
+            fSets.add(fCM);     fClassNames.add("CM");
+            fSets.add(fNL);     fClassNames.add("NL");
+            fSets.add(fWJ);     fClassNames.add("WJ");
+            fSets.add(fZW);     fClassNames.add("ZW");
+            fSets.add(fGL);     fClassNames.add("GL");
+            fSets.add(fSP);     fClassNames.add("SP");
+            fSets.add(fB2);     fClassNames.add("B2");
+            fSets.add(fBA);     fClassNames.add("BA");
+            fSets.add(fBB);     fClassNames.add("BB");
+            fSets.add(fHY);     fClassNames.add("HY");
+            fSets.add(fCB);     fClassNames.add("CB");
+            fSets.add(fCL);     fClassNames.add("CL");
+            fSets.add(fCP);     fClassNames.add("CP");
+            fSets.add(fEX);     fClassNames.add("EX");
+            fSets.add(fIN);     fClassNames.add("IN");
+            fSets.add(fJL);     fClassNames.add("JL");
+            fSets.add(fJT);     fClassNames.add("JT");
+            fSets.add(fJV);     fClassNames.add("JV");
+            fSets.add(fNS);     fClassNames.add("NV");
+            fSets.add(fOP);     fClassNames.add("OP");
+            fSets.add(fQU);     fClassNames.add("QU");
+            fSets.add(fIS);     fClassNames.add("IS");
+            fSets.add(fNU);     fClassNames.add("NU");
+            fSets.add(fPO);     fClassNames.add("PO");
+            fSets.add(fPR);     fClassNames.add("PR");
+            fSets.add(fSY);     fClassNames.add("SY");
+            fSets.add(fAI);     fClassNames.add("AI");
+            fSets.add(fAL);     fClassNames.add("AL");
+            fSets.add(fH2);     fClassNames.add("H2");
+            fSets.add(fH3);     fClassNames.add("H3");
+            fSets.add(fHL);     fClassNames.add("HL");
+            fSets.add(fID);     fClassNames.add("ID");
+            fSets.add(fWJ);     fClassNames.add("WJ");
+            fSets.add(fRI);     fClassNames.add("RI");
+            fSets.add(fSG);     fClassNames.add("SG");
+            fSets.add(fEB);     fClassNames.add("EB");
+            fSets.add(fEM);     fClassNames.add("EM");
+            fSets.add(fZWJ);    fClassNames.add("ZWJ");
              // TODO: fOP30 & fCP30 overlap with plain fOP. Probably OK, but fOP/CP chars will be over-represented.
-            fSets.add(fOP30);
-            fSets.add(fCP30);
+            fSets.add(fOP30);   fClassNames.add("OP30");
+            fSets.add(fCP30);   fClassNames.add("CP30");
          }
  
          @Override
          void setText(StringBuffer s) {
              fText       = s;
+            prepareAppliedRules(s.length());
          }
  
  
@@ -869,12 +913,11 @@ public class RBBITestMonkey extends TestFmwk {
                  pos       = nextPos;
                  nextPos   = moveIndex32(fText, pos, 1);
  
-                // Rule LB2 - Break at end of text.
                  if (pos >= fText.length()) {
+                    setAppliedRule(pos, "LB 2   Break at end of text");
                      break;
                  }
  
-                // Rule LB 9 - adjust for combining sequences.
                  //             We do this rule out-of-order because the adjustment does
                  //             not effect the way that rules LB 3 through LB 6 match,
                  //             and doing it here rather than after LB 6 is substantially
@@ -912,41 +955,43 @@ public class RBBITestMonkey extends TestFmwk {
                  //   -1 positions out of prevPos yet - loop back to advance the
                  //    position in the input without any further looking for breaks.
                  if (prevPos == -1) {
+                    setAppliedRule(pos, "LB 9   adjust for combining sequences.");
                      continue;
                  }
  
-                // LB 4  Always break after hard line breaks,
                  if (fBK.contains(prevChar)) {
+                    setAppliedRule(pos, "LB 4   Always break after hard line breaks");
                      break;
                  }
  
-                // LB 5  Break after CR, LF, NL, but not inside CR LF
                  if (fCR.contains(prevChar) && fLF.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 5   Break after CR, LF, NL, but not inside CR LF");
                      continue;
                  }
                  if  (fCR.contains(prevChar) ||
                          fLF.contains(prevChar) ||
                          fNL.contains(prevChar))  {
+                    setAppliedRule(pos, "LB 5   Break after CR, LF, NL, but not inside CR LF");
                      break;
                  }
  
-                // LB 6  Don't break before hard line breaks
                  if (fBK.contains(thisChar) || fCR.contains(thisChar) ||
                          fLF.contains(thisChar) || fNL.contains(thisChar) ) {
+                    setAppliedRule(pos, "LB 6   Don't break before hard line breaks");
                      continue;
                  }
  
  
-                // LB 7  Don't break before spaces or zero-width space.
                  if (fSP.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 7   Don't break before spaces or zero-width space");
                      continue;
                  }
  
                  if (fZW.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 7   Don't break before spaces or zero-width space");
                      continue;
                  }
  
-                // LB 8  Break after zero width space
                  //       ZW SP* ÷
                  //       Scan backwards from prevChar for SP* ZW
                  tPos = prevPos;
@@ -954,10 +999,10 @@ public class RBBITestMonkey extends TestFmwk {
                      tPos = moveIndex32(fText, tPos, -1);
                  }
                  if (fZW.contains(UTF16.charAt(fText, tPos))) {
+                    setAppliedRule(pos, "LB 8   Break after zero width space");
                      break;
                  }
  
-                // LB 25    Numbers
                  //          Move this test up, before LB8a, because numbers can match a longer sequence that would
                  //          also match 8a.  e.g. NU ZWJ IS PO     (ZWJ acts like CM)
                  matchVals = LBNumberCheck(fText, prevPos, matchVals);
@@ -979,57 +1024,53 @@ public class RBBITestMonkey extends TestFmwk {
                              }
                              while (fCM.contains(thisChar));
                          }
+                        setAppliedRule(pos, "LB 25  Numbers");
                          continue;
                      }
                  }
  
-                // LB 8a:  ZWJ x (ID | Extended_Pictographic | Emoji)
                  //       The monkey test's way of ignoring combining characters doesn't work
                  //       for this rule. ZWJ is also a CM. Need to get the actual character
                  //       preceding "thisChar", not ignoring combining marks, possibly ZWJ.
                  {
                      int prevC = fText.codePointBefore(pos);
                      if (fZWJ.contains(prevC)) {
+                        setAppliedRule(pos, "LB 8a  ZWJ x");
                          continue;
                      }
                  }
  
-                //  LB 9, 10  Already done, at top of loop.
-                //
+                // appliedRule: "LB 9, 10"; //  Already done, at top of loop.";
  
  
-                // LB 11
                  //    x  WJ
                  //    WJ  x
                  if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
+                    setAppliedRule(pos, "LB 11  Do not break before or after WORD JOINER and related characters.");
                      continue;
                  }
  
  
-                // LB 12
-                //        GL x
                  if (fGL.contains(prevChar)) {
+                    setAppliedRule(pos, "LB 12  GL  x");
                      continue;
                  }
  
-                // LB 12a
-                //    [^SP BA HY] x GL
                  if (!(fSP.contains(prevChar) ||
                          fBA.contains(prevChar) ||
                          fHY.contains(prevChar)     ) && fGL.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 12a [^SP BA HY] x GL");
                      continue;
                  }
  
-                // LB 13  Don't break before closings.
-                //
                  if (fCL.contains(thisChar) ||
                          fCP.contains(thisChar) ||
                          fEX.contains(thisChar) ||
                          fSY.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 13  Don't break before closings");
                      continue;
                  }
  
-                // LB 14  Don't break after OP SP*
                  //       Scan backwards, checking for this sequence.
                  //       The OP char could include combining marks, so we actually check for
                  //           OP CM* SP* x
@@ -1043,24 +1084,23 @@ public class RBBITestMonkey extends TestFmwk {
                      tPos=moveIndex32(fText, tPos, -1);
                  }
                  if (fOP.contains(UTF16.charAt(fText, tPos))) {
+                    setAppliedRule(pos, "LB 14  Don't break after OP SP*");
                      continue;
                  }
  
-                // LB 14a Break before an IS that begins a number and follows a space
                  if (nextPos < fText.length()) {
                      int nextChar = fText.codePointAt(nextPos);
                      if (fSP.contains(prevChar) && fIS.contains(thisChar) && fNU.contains(nextChar)) {
+                        setAppliedRule(pos, "LB 14a Break before an IS that begins a number and follows a space");
                          break;
                      }
                  }
  
-                // LB14b Do not break before numeric separators, even after spaces.
                  if (fIS.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 14b Do not break before numeric separators, even after spaces");
                      continue;
                  }
  
-                // LB 15 Do not break within "[
-                //       QU CM* SP* x OP
                  if (fOP.contains(thisChar)) {
                      // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
                      tPos = prevPos;
@@ -1071,11 +1111,11 @@ public class RBBITestMonkey extends TestFmwk {
                          tPos = moveIndex32(fText, tPos, -1);
                      }
                      if (fQU.contains(UTF16.charAt(fText, tPos))) {
+                        setAppliedRule(pos, "LB 15  QU SP* x OP");
                          continue;
                      }
                  }
  
-                // LB 16   (CL | CP) SP* x NS
                  if (fNS.contains(thisChar)) {
                      tPos = prevPos;
                      while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
@@ -1085,12 +1125,12 @@ public class RBBITestMonkey extends TestFmwk {
                          tPos = moveIndex32(fText, tPos, -1);
                      }
                      if (fCL.contains(UTF16.charAt(fText, tPos)) || fCP.contains(UTF16.charAt(fText, tPos))) {
+                        setAppliedRule(pos, "LB 16  (CL | CP) SP* x NS");
                          continue;
                      }
                  }
  
  
-                // LB 17        B2 SP* x B2
                  if (fB2.contains(thisChar)) {
                      tPos = prevPos;
                      while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
@@ -1100,156 +1140,169 @@ public class RBBITestMonkey extends TestFmwk {
                          tPos = moveIndex32(fText, tPos, -1);
                      }
                      if (fB2.contains(UTF16.charAt(fText, tPos))) {
+                        setAppliedRule(pos, "LB 17  B2 SP* x B2");
                          continue;
                      }
                  }
  
-                // LB 18    break after space
                  if (fSP.contains(prevChar)) {
+                    setAppliedRule(pos, "LB 18  break after space");
                      break;
                  }
  
-                // LB 19
                  //    x   QU
                  //    QU  x
                  if (fQU.contains(thisChar) || fQU.contains(prevChar)) {
+                        setAppliedRule(pos, "LB 19");
                      continue;
                  }
  
-                // LB 20  Break around a CB
                  if (fCB.contains(thisChar) || fCB.contains(prevChar)) {
+                    setAppliedRule(pos, "LB 20  Break around a CB");
                      break;
                  }
  
-                // LB 20.09  Don't break between Hyphens and letters if a break precedes the hyphen.
+                //           Don't break between Hyphens and letters if a break precedes the hyphen.
                  //           Formerly this was a Finnish tailoring.
                  //           Moved to root in ICU 63. This is an ICU customization, not in UAX-14.
                  //    ^($HY | $HH) $AL;
                  if (fAL.contains(thisChar) && (fHY.contains(prevChar) || fHH.contains(prevChar)) &&
                          prevPosX2 == -1) {
+                    setAppliedRule(pos, "LB 20.09");
                      continue;
                  }
  
-                // LB 21
                  if (fBA.contains(thisChar) ||
                          fHY.contains(thisChar) ||
                          fNS.contains(thisChar) ||
                          fBB.contains(prevChar) )   {
+                    setAppliedRule(pos, "LB 21");
                      continue;
                  }
  
-                // LB 21a, HL (HY | BA) x
                  if (fHL.contains(prevCharX2) && (fHY.contains(prevChar) || fBA.contains(prevChar))) {
+                    setAppliedRule(pos, "LB 21a HL (HY | BA) x");
                      continue;
                  }
  
-                // LB 21b, SY x HL
                  if (fSY.contains(prevChar) && fHL.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 21b SY x HL");
                      continue;
                  }
  
-                // LB 22
                  if (fIN.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 22");
                      continue;
                  }
  
-                // LB 23    (AL | HL) x NU
+                //          (AL | HL) x NU
                  //          NU x (AL | HL)
                  if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && fNU.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 23");
                      continue;
                  }
                  if (fNU.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 23");
                      continue;
                  }
  
-                // LB 23a Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
+                // Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
                  //      PR x (ID | EB | EM)
                  //     (ID | EB | EM) x PO
                  if (fPR.contains(prevChar) &&
                          (fID.contains(thisChar) || fEB.contains(thisChar) || fEM.contains(thisChar)))  {
+                    setAppliedRule(pos, "LB 23a");
                      continue;
                  }
                  if ((fID.contains(prevChar) || fEB.contains(prevChar) || fEM.contains(prevChar)) &&
                          fPO.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 23a");
                      continue;
                  }
  
-                // LB 24  Do not break between prefix and letters or ideographs.
+                // Do not break between prefix and letters or ideographs.
                  //         (PR | PO) x (AL | HL)
                  //         (AL | HL) x (PR | PO)
                  if ((fPR.contains(prevChar) || fPO.contains(prevChar)) &&
                          (fAL.contains(thisChar) || fHL.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 24  no break between prefix and letters or ideographs");
                      continue;
                  }
                  if ((fAL.contains(prevChar) || fHL.contains(prevChar)) &&
                          (fPR.contains(thisChar) || fPO.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 24  no break between prefix and letters or ideographs");
                      continue;
                  }
  
-                // LB 25  Numbers  match, moved up, before LB 8a.
+                // appliedRule: "LB 25 numbers match"; // moved up, before LB 8a,
  
-                // LB 26  Do not break Korean Syllables
                  if (fJL.contains(prevChar) && (fJL.contains(thisChar) ||
                          fJV.contains(thisChar) ||
                          fH2.contains(thisChar) ||
                          fH3.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 26  Do not break a Korean syllable.");
                      continue;
                  }
  
                  if ((fJV.contains(prevChar) || fH2.contains(prevChar))  &&
                          (fJV.contains(thisChar) || fJT.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 26  Do not break a Korean syllable.");
                      continue;
                  }
  
                  if ((fJT.contains(prevChar) || fH3.contains(prevChar)) &&
                          fJT.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 26  Do not break a Korean syllable.");
                      continue;
                  }
  
-                // LB 27 Treat a Korean Syllable Block the same as ID
                  if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
                          fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
                          fIN.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 27  Treat a Korean Syllable Block the same as ID.");
                      continue;
                  }
                  if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
                          fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
                          fPO.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 27  Treat a Korean Syllable Block the same as ID.");
                      continue;
                  }
                  if (fPR.contains(prevChar) && (fJL.contains(thisChar) || fJV.contains(thisChar) ||
                          fJT.contains(thisChar) || fH2.contains(thisChar) || fH3.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 27  Treat a Korean Syllable Block the same as ID.");
                      continue;
                  }
  
  
  
-                // LB 28 Do not break between alphabetics
                  if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 28  Do not break between alphabetics");
                      continue;
                  }
  
-                // LB 29  Do not break between numeric punctuation and alphabetics
                  if (fIS.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 29  Do not break between numeric punctuation and alphabetics");
                      continue;
                  }
  
-                // LB 30    Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
                  //          (AL | NU) x OP
                  //          CP x (AL | NU)
                  if ((fAL.contains(prevChar) || fHL.contains(prevChar) || fNU.contains(prevChar)) &&
                          fOP30.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 30  Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.");
                      continue;
                  }
                  if (fCP30.contains(prevChar) &&
                          (fAL.contains(thisChar) || fHL.contains(thisChar) || fNU.contains(thisChar))) {
+                    setAppliedRule(pos, "LB 30  Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.");
                      continue;
                  }
  
-                // LB 30a   Break between pairs of Regional Indicators.
                  //             RI RI  ÷  RI
                  //                RI  x  RI
                  if (fRI.contains(prevCharX2) && fRI.contains(prevChar) && fRI.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 30a Break between pairs of Regional Indicators.");
                      break;
                  }
                  if (fRI.contains(prevChar) && fRI.contains(thisChar)) {
@@ -1257,14 +1310,16 @@ public class RBBITestMonkey extends TestFmwk {
                      // Over-write the trailing one (thisChar) to prevent it from forming another pair with a
                      // following RI. This is a hack.
                      thisChar = -1;
+                    setAppliedRule(pos, "LB 30a Break between pairs of Regional Indicators.");
                      continue;
                  }
  
-                // LB30b    Emoji Base x Emoji Modifier
                  if (fEB.contains(prevChar) && fEM.contains(thisChar)) {
+                    setAppliedRule(pos, "LB 30b Emoji Base x Emoji Modifier");
                      continue;
                  }
                  // LB 31    Break everywhere else
+                setAppliedRule(pos, "LB 31 Break everywhere else");
                  break;
              }
  
@@ -1447,7 +1502,6 @@ public class RBBITestMonkey extends TestFmwk {
       *
       */
      static class RBBISentenceMonkey extends RBBIMonkeyKind {
-        List                 fSets;
          StringBuffer         fText;
  
          UnicodeSet           fSepSet;
@@ -1464,13 +1518,9 @@ public class RBBITestMonkey extends TestFmwk {
          UnicodeSet           fOtherSet;
          UnicodeSet           fExtendSet;
  
-
-
          RBBISentenceMonkey() {
              fCharProperty  = UProperty.SENTENCE_BREAK;
  
-            fSets            = new ArrayList();
-
              //  Separator Set Note:  Beginning with Unicode 5.1, CR and LF were removed from the separator
              //                       set and made into character classes of their own.  For the monkey impl,
              //                       they remain in SEP, since Sep always appears with CR and LF in the rules.
@@ -1503,20 +1553,20 @@ public class RBBITestMonkey extends TestFmwk {
              fOtherSet.removeAll(fCloseSet);
              fOtherSet.removeAll(fExtendSet);
  
-            fSets.add(fSepSet);
-            fSets.add(fFormatSet);
-
-            fSets.add(fSpSet);
-            fSets.add(fLowerSet);
-            fSets.add(fUpperSet);
-            fSets.add(fOLetterSet);
-            fSets.add(fNumericSet);
-            fSets.add(fATermSet);
-            fSets.add(fSContinueSet);
-            fSets.add(fSTermSet);
-            fSets.add(fCloseSet);
-            fSets.add(fOtherSet);
-            fSets.add(fExtendSet);
+            fSets.add(fSepSet);         fClassNames.add("Sep");
+            fSets.add(fFormatSet);      fClassNames.add("Format");
+
+            fSets.add(fSpSet);          fClassNames.add("Sp");
+            fSets.add(fLowerSet);       fClassNames.add("Lower");
+            fSets.add(fUpperSet);       fClassNames.add("Upper");
+            fSets.add(fOLetterSet);     fClassNames.add("OLetter");
+            fSets.add(fNumericSet);     fClassNames.add("Numeric");
+            fSets.add(fATermSet);       fClassNames.add("ATerm");
+            fSets.add(fSContinueSet);   fClassNames.add("SContinue");
+            fSets.add(fSTermSet);       fClassNames.add("STerm");
+            fSets.add(fCloseSet);       fClassNames.add("Close");
+            fSets.add(fOtherSet);       fClassNames.add("Other");
+            fSets.add(fExtendSet);      fClassNames.add("Extend");
          }
  
  
@@ -1528,6 +1578,7 @@ public class RBBITestMonkey extends TestFmwk {
          @Override
          void   setText(StringBuffer s) {
              fText = s;
+            prepareAppliedRules(s.length());
          }
  
  
@@ -1598,43 +1649,44 @@ public class RBBITestMonkey extends TestFmwk {
                  p1 = p2;  c1 = c2;
                  p2 = p3;  c2 = c3;
  
-                // Advancd p3 by  X(Extend | Format)*   Rule 4
+                // Advance p3 by  X(Extend | Format)*   Rule 4
                  p3 = moveForward(p3);
                  c3 = cAt(p3);
  
-                // Rule (3) CR x LF
                  if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
+                    setAppliedRule(p2, "SB3   CR x LF");
                      continue;
                  }
  
-                // Rule (4)    Sep  <break>
                  if (fSepSet.contains(c1)) {
                      p2 = p1+1;   // Separators don't combine with Extend or Format
+                    setAppliedRule(p2, "SB4   Sep  <break>");
                      break;
                  }
  
                  if (p2 >= fText.length()) {
                      // Reached end of string.  Always a break position.
+                    setAppliedRule(p2, "SB4   Sep  <break>");
                      break;
                  }
  
                  if (p2 == prevPos) {
                      // Still warming up the loop.  (won't work with zero length strings, but we don't care)
+                    setAppliedRule(p2, "SB4   Sep  <break>");
                      continue;
                  }
  
-                // Rule (6).   ATerm x Numeric
                  if (fATermSet.contains(c1) &&  fNumericSet.contains(c2))  {
+                    setAppliedRule(p2, "SB6   ATerm x Numeric");
                      continue;
                  }
  
-                // Rule (7).  (Upper | Lower) ATerm  x  Uppper
                  if ((fUpperSet.contains(c0) || fLowerSet.contains(c0)) &&
                          fATermSet.contains(c1) && fUpperSet.contains(c2)) {
+                    setAppliedRule(p2, "SB7   (Upper | Lower) ATerm  x  Uppper");
                      continue;
                  }
  
-                // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower
                  //           Note:  Sterm | ATerm are added to the negated part of the expression by a
                  //                  note to the Unicode 5.0 documents.
                  int p8 = p1;
@@ -1652,16 +1704,17 @@ public class RBBITestMonkey extends TestFmwk {
                                  fLowerSet.contains(c) || fSepSet.contains(c) ||
                                  fATermSet.contains(c) || fSTermSet.contains(c))
                          {
+                            setAppliedRule(p2, "SB8   ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower");
                              break;
                          }
                          p8 = moveForward(p8);
                      }
                      if (p8<fText.length() && fLowerSet.contains(cAt(p8))) {
+                        setAppliedRule(p2, "SB8   ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower");
                          continue;
                      }
                  }
  
-                // Rule 8a  (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)
                  if (fSContinueSet.contains(c2) || fSTermSet.contains(c2) || fATermSet.contains(c2)) {
                      p8 = p1;
                      while (setContains(fSpSet, cAt(p8))) {
@@ -1672,12 +1725,12 @@ public class RBBITestMonkey extends TestFmwk {
                      }
                      c = cAt(p8);
                      if (setContains(fSTermSet, c) || setContains(fATermSet, c)) {
+                        setAppliedRule(p2, "SB8a  (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)");
                          continue;
                      }
                  }
  
  
-                // Rule (9)  (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)
                  int p9 = p1;
                  while (p9>0 && fCloseSet.contains(cAt(p9))) {
                      p9 = moveBack(p9);
@@ -1685,11 +1738,11 @@ public class RBBITestMonkey extends TestFmwk {
                  c = cAt(p9);
                  if ((fSTermSet.contains(c) || fATermSet.contains(c))) {
                      if (fCloseSet.contains(c2) || fSpSet.contains(c2) || fSepSet.contains(c2)) {
+                        setAppliedRule(p2, "SB9   (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)");
                          continue;
                      }
                  }
  
-                // Rule (10)  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)
                  int p10 = p1;
                  while (p10>0 && fSpSet.contains(cAt(p10))) {
                      p10 = moveBack(p10);
@@ -1699,11 +1752,11 @@ public class RBBITestMonkey extends TestFmwk {
                  }
                  if (fSTermSet.contains(cAt(p10)) || fATermSet.contains(cAt(p10))) {
                      if (fSpSet.contains(c2) || fSepSet.contains(c2)) {
+                        setAppliedRule(p2, "SB10  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)");
                          continue;
                      }
                  }
  
-                // Rule (11)  (STerm | ATerm) Close* Sp*   <break>
                  int p11 = p1;
                  if (p11>0 && fSepSet.contains(cAt(p11))) {
                      p11 = moveBack(p11);
@@ -1715,18 +1768,16 @@ public class RBBITestMonkey extends TestFmwk {
                      p11 = moveBack(p11);
                  }
                  if (fSTermSet.contains(cAt(p11)) || fATermSet.contains(cAt(p11))) {
+                    setAppliedRule(p2, "SB11  (STerm | ATerm) Close* Sp*   <break>");
                      break;
                  }
  
-                //  Rule (12)  Any x Any
+                setAppliedRule(p2, "SB12  Any x Any");
                  continue;
              }
              breakPos = p2;
              return breakPos;
          }
-
-
-
      }
  
  
@@ -1876,7 +1927,6 @@ public class RBBITestMonkey extends TestFmwk {
          StringBuffer     testText         = new StringBuffer();
          int              numCharClasses;
          List             chClasses;
-        int[]            expected         = new int[TESTSTRINGLEN*2 + 1];
          int              expectedCount    = 0;
          boolean[]        expectedBreaks   = new boolean[TESTSTRINGLEN*2 + 1];
          boolean[]        forwardBreaks    = new boolean[TESTSTRINGLEN*2 + 1];
@@ -1923,6 +1973,9 @@ public class RBBITestMonkey extends TestFmwk {
          //
          //--------------------------------------------------------------------------------------------
  
+        // For minimizing width of class name output.
+        int classNameSize = mk.maxClassNameSize();
+
          int  dotsOnLine = 0;
          while (loopCount < numIterations || numIterations == -1) {
              if (numIterations == -1 && loopCount % 10 == 0) {
@@ -1966,7 +2019,6 @@ public class RBBITestMonkey extends TestFmwk {
                  System.out.println();
              }
  
-            Arrays.fill(expected, 0);
              Arrays.fill(expectedBreaks, false);
              Arrays.fill(forwardBreaks, false);
              Arrays.fill(reverseBreaks, false);
@@ -1974,11 +2026,10 @@ public class RBBITestMonkey extends TestFmwk {
              Arrays.fill(followingBreaks, false);
              Arrays.fill(precedingBreaks, false);
  
-            // Calculate the expected results for this test string.
+            // Calculate the expected results for this test string and reset applied rules.
              mk.setText(testText);
              expectedCount = 0;
              expectedBreaks[0] = true;
-            expected[expectedCount ++] = 0;
              int breakPos = 0;
              int lastBreakPos = -1;
              for (;;) {
@@ -1995,7 +2046,6 @@ public class RBBITestMonkey extends TestFmwk {
                      // break;
                  }
                  expectedBreaks[breakPos] = true;
-                expected[expectedCount ++] = breakPos;
              }
  
              // Find the break positions using forward iteration
@@ -2076,16 +2126,22 @@ public class RBBITestMonkey extends TestFmwk {
              // Compare the expected and actual results.
              for (i=0; i<=testText.length(); i++) {
                  String errorType = null;
+                boolean[] currentBreakData = null;
                  if  (forwardBreaks[i] != expectedBreaks[i]) {
                      errorType = "next()";
+                    currentBreakData = forwardBreaks;
                  } else if (reverseBreaks[i] != forwardBreaks[i]) {
                      errorType = "previous()";
+                    currentBreakData = reverseBreaks;
                  } else if (isBoundaryBreaks[i] != expectedBreaks[i]) {
                      errorType = "isBoundary()";
+                    currentBreakData = isBoundaryBreaks;
                  } else if (followingBreaks[i] != expectedBreaks[i]) {
                      errorType = "following()";
+                    currentBreakData = followingBreaks;
                  } else if (precedingBreaks[i] != expectedBreaks[i]) {
                      errorType = "preceding()";
+                    currentBreakData = precedingBreaks;
                  }
  
                  if (errorType != null) {
@@ -2119,43 +2175,47 @@ public class RBBITestMonkey extends TestFmwk {
                          }
                      }
  
-                    // Format looks like   "<data><>\uabcd\uabcd<>\U0001abcd...</data>"
-                    StringBuffer errorText = new StringBuffer();
-
-                    int      c;    // Char from test data
+                    // Formatting of each line includes:
+                    //   character code
+                    //   reference break: '|' -> a break, '.' -> no break
+                    //   actual break:    '|' -> a break, '.' -> no break
+                    //   (name of character clase)
+                    //   Unicode name of character
+                    //   '--→' indicates location of the difference.
+
+                    StringBuilder buffer = new StringBuilder();
+                    buffer.append("\n")
+                        .append((expectedBreaks[i] ? "Break expected but not found." : "Break found but not expected."))
+                        .append(
+                            String.format(" at index %d. Parameters to reproduce: @\"type=%s  seed=%d  loop=1\"\n",
+                              i, name, seed));
+
+                    int c;  // Char from test data
                      for (ci = startContext;  ci <= endContext && ci != -1;  ci = nextCP(testText, ci)) {
-                        if (ci == i) {
-                            // This is the location of the error.
-                            errorText.append("<?>---------------------------------\n");
-                        } else if (expectedBreaks[ci]) {
-                            // This a non-error expected break position.
-                            errorText.append("------------------------------------\n");
-                        }
-                        if (ci < testText.length()) {
-                            c = UTF16.charAt(testText, ci);
-                            appendCharToBuf(errorText, c, 11);
-                            String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
-                            appendToBuf(errorText, gc, 8);
-                            int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
-                            String extraPropValue =
-                                    UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
-                            appendToBuf(errorText, extraPropValue, 20);
-
-                            String charName = UCharacter.getExtendedName(c);
-                            appendToBuf(errorText, charName, 40);
-                            errorText.append('\n');
+
+                        c = testText.codePointAt(ci);
+                        buffer.append((ci == i) ? " --→" : "    ")
+                            .append(String.format(" %3d : ", ci))
+                            .append(!expectedBreaks[ci] ? " . " : " | ")  // Reference break
+                            .append(!currentBreakData[ci] ? " . " : " | "); // Actual break
+
+                        // BMP or SMP character in hex
+                        if (c >= 0x10000) {
+                            buffer.append("\\U").append(String.format("%08x", (int) c));
+                        } else {
+                            buffer.append("    \\u").append(String.format("%04x", (int) c));
                          }
+
+                        buffer.append(
+                            String.format(String.format(" %%-%ds", (int) classNameSize),
+                              mk.classNameFromCodepoint(c)))
+                            .append(String.format(" %-40s", mk.getAppliedRule(ci)))
+                            .append(String.format(" %-40s\n", UCharacter.getExtendedName(c)));
+
+                        if (ci >= endContext) { break; }
                      }
-                    if (ci == testText.length() && ci != -1) {
-                        errorText.append("<>");
-                    }
-                    errorText.append("</data>\n");
+                    errln(buffer.toString());
  
-                    // Output the error
-                    errln(name + " break monkey test error.  " +
-                            (expectedBreaks[i]? "Break expected but not found." : "Break found but not expected.") +
-                            "\nOperation = " + errorType + "; random seed = " + seed + ";  buf Idx = " + i + "\n" +
-                            errorText);
                      break;
                  }
              }
author	Frank Tang <ftang@chromium.org>
	Mon, 9 Mar 2020 16:36:52 +0000 (16:36 +0000)
committer	Frank Yung-Fong Tang <41213225+FrankYFTang@users.noreply.github.com>
	Tue, 10 Mar 2020 03:17:00 +0000 (20:17 -0700)