From b552700cc621920edc968f67ffe28bb0fc430668 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Sun, 28 Feb 2016 19:14:48 +0000
Subject: [PATCH] ICU-12081 RBBI extensions & Emoji rules. Import rule data to
 Java from C++, port code changes.

X-SVN-Rev: 38422
---
 .../core/src/com/ibm/icu/text/RBBINode.java   |  37 +-
 .../com/ibm/icu/text/RBBIRuleParseTable.java  | 243 +++----
 .../src/com/ibm/icu/text/RBBIRuleScanner.java |  78 ++-
 .../com/ibm/icu/text/RBBITableBuilder.java    | 115 ++--
 .../ibm/icu/text/RuleBasedBreakIterator.java  | 442 ++++++------
 icu4j/main/shared/data/icudata.jar            |   4 +-
 icu4j/main/shared/data/icutzdata.jar          |   2 +-
 .../ibm/icu/dev/test/rbbi/RBBITestMonkey.java | 646 ++++++++++--------
 8 files changed, 865 insertions(+), 702 deletions(-)
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBINode.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBINode.java
index 028e23b8386..cefbbd02166 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBINode.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBINode.java
@@ -1,6 +1,6 @@
 /********************************************************************
  * COPYRIGHT:
- * Copyright (c) 2001-2010, International Business Machines Corporation and
+ * Copyright (c) 2001-2016, International Business Machines Corporation and
  * others. All Rights Reserved.
  ********************************************************************/
 
@@ -17,7 +17,7 @@ import com.ibm.icu.impl.Assert;
  */
 class RBBINode {
 
-    
+
  //   enum NodeType {
      static final int    setRef = 0;
      static final int    uset = 1;
@@ -36,7 +36,7 @@ class RBBINode {
      static final int    opReverse = 14;
      static final int    opLParen = 15;
      static final int    nodeTypeLimit = 16;    //  For Assertion checking only.
-     
+
      static final String []  nodeTypeNames = {
          "setRef",
          "uset",
@@ -56,20 +56,20 @@ class RBBINode {
          "opLParen"
      };
 
-//    enum OpPrecedence {      
+//    enum OpPrecedence {
     static final int    precZero   = 0;
     static final int    precStart  = 1;
     static final int    precLParen = 2;
     static final int    precOpOr   = 3;
     static final int    precOpCat  = 4;
-        
+
     int          fType;   // enum NodeType
     RBBINode      fParent;
     RBBINode      fLeftChild;
     RBBINode      fRightChild;
     UnicodeSet    fInputSet;           // For uset nodes only.
     int          fPrecedence = precZero;   // enum OpPrecedence, For binary ops only.
-    
+
     String       fText;                 // Text corresponding to this node.
                                         //   May be lazily evaluated when (if) needed
                                         //   for some node types.
@@ -89,12 +89,17 @@ class RBBINode {
                                         //   state transition table.
 
     boolean      fLookAheadEnd;        // For endMark nodes, set TRUE if
-                                        //   marking the end of a look-ahead rule.
+                                       //   marking the end of a look-ahead rule.
+
+    boolean      fRuleRoot;             // True if this node is the root of a rule.
+    boolean      fChainIn;              // True if chaining into this rule is allowed
+                                        //     (no '^' present).
+
 
     Set<RBBINode> fFirstPosSet;         // See Aho DFA table generation algorithm
-    Set<RBBINode> fLastPosSet;          // See Aho.       
+    Set<RBBINode> fLastPosSet;          // See Aho.
     Set<RBBINode> fFollowPos;           // See Aho.
-    
+
     int           fSerialNum;           //  Debugging aids.  Each node gets a unique serial number.
     static int    gLastSerial;
 
@@ -129,6 +134,8 @@ class RBBINode {
         fLastPos = other.fLastPos;
         fNullable = other.fNullable;
         fVal = other.fVal;
+        fRuleRoot = false;
+        fChainIn = other.fChainIn;
         fFirstPosSet = new HashSet<RBBINode>(other.fFirstPosSet);
         fLastPosSet = new HashSet<RBBINode>(other.fLastPosSet);
         fFollowPos = new HashSet<RBBINode>(other.fFollowPos);
@@ -163,6 +170,8 @@ class RBBINode {
                 n.fRightChild.fParent = n;
             }
         }
+        n.fRuleRoot = this.fRuleRoot;
+        n.fChainIn  = this.fChainIn;
         return n;
     }
 
@@ -259,8 +268,8 @@ class RBBINode {
         }
     }
 
-    
- 
+
+
     //-------------------------------------------------------------------------
     //
     //        print. Print out a single node, for debugging.
@@ -279,7 +288,7 @@ class RBBINode {
             RBBINode.printInt(n.fRightChild==null? 0 : n.fRightChild.fSerialNum, 12);
             RBBINode.printInt(n.fFirstPos, 12);
             RBBINode.printInt(n.fVal, 7);
-            
+
             if (n.fType == varRef) {
                 System.out.print(" " + n.fText);
             }
@@ -287,7 +296,7 @@ class RBBINode {
         System.out.println("");
     }
     ///CLOVER:ON
- 
+
 
     // Print a String in a fixed field size.
     // Debugging function.
@@ -344,7 +353,7 @@ class RBBINode {
                 if (fLeftChild != null) {
                     fLeftChild.printTree(false);
                 }
-                
+
                 if (fRightChild != null) {
                     fRightChild.printTree(false);
                 }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleParseTable.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleParseTable.java
index b2f78d6fc97..53cd225e283 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleParseTable.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleParseTable.java
@@ -1,7 +1,7 @@
 /*
  *******************************************************************************
- * Copyright (C) 2003-2010, International Business Machines Corporation and
- * others. All Rights Reserved.
+ * Copyright (c) 2003-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
  *******************************************************************************
  */
  
@@ -13,6 +13,8 @@ package com.ibm.icu.text;
  * rule parser.
  * It is generated by the Perl script "rbbicst.pl" from
  * the rule parser state definitions file "rbbirpt.txt".
+ * @internal 
+ *
  */
 class RBBIRuleParseTable
 {
@@ -29,24 +31,25 @@ class RBBIRuleParseTable
      static final short doExprStart = 11;
      static final short doLParen = 12;
      static final short doNOP = 13;
-     static final short doOptionEnd = 14;
-     static final short doOptionStart = 15;
-     static final short doReverseDir = 16;
-     static final short doRuleChar = 17;
-     static final short doRuleError = 18;
-     static final short doRuleErrorAssignExpr = 19;
-     static final short doScanUnicodeSet = 20;
-     static final short doSlash = 21;
-     static final short doStartAssign = 22;
-     static final short doStartTagValue = 23;
-     static final short doStartVariableName = 24;
-     static final short doTagDigit = 25;
-     static final short doTagExpectedError = 26;
-     static final short doTagValue = 27;
-     static final short doUnaryOpPlus = 28;
-     static final short doUnaryOpQuestion = 29;
-     static final short doUnaryOpStar = 30;
-     static final short doVariableNameExpectedErr = 31;
+     static final short doNoChain = 14;
+     static final short doOptionEnd = 15;
+     static final short doOptionStart = 16;
+     static final short doReverseDir = 17;
+     static final short doRuleChar = 18;
+     static final short doRuleError = 19;
+     static final short doRuleErrorAssignExpr = 20;
+     static final short doScanUnicodeSet = 21;
+     static final short doSlash = 22;
+     static final short doStartAssign = 23;
+     static final short doStartTagValue = 24;
+     static final short doStartVariableName = 25;
+     static final short doTagDigit = 26;
+     static final short doTagExpectedError = 27;
+     static final short doTagValue = 28;
+     static final short doUnaryOpPlus = 29;
+     static final short doUnaryOpQuestion = 30;
+     static final short doUnaryOpStar = 31;
+     static final short doVariableNameExpectedErr = 32;
  
      static final short kRuleSet_default = 255;
      static final short kRuleSet_digit_char = 128;
@@ -73,104 +76,112 @@ class RBBIRuleParseTable
       fNextChar  = nc; 
       fStateName = sn; 
    } 
-   } 
+   }; 
   
     static RBBIRuleTableElement[] gRuleParseStateTable = { 
        new RBBIRuleTableElement(doNOP, 0, 0,0,  true,   null )     //  0 
-     , new RBBIRuleTableElement(doExprStart, 254, 21, 8, false,   "start")     //  1 
+     , new RBBIRuleTableElement(doExprStart, 254, 29, 9, false,   "start")     //  1 
      , new RBBIRuleTableElement(doNOP, 132, 1,0,  true,   null )     //  2 
-     , new RBBIRuleTableElement(doExprStart,'$',  80, 90, false,   null )     //  3 
-     , new RBBIRuleTableElement(doNOP,'!',  11,0,  true,   null )     //  4 
-     , new RBBIRuleTableElement(doNOP,';',  1,0,  true,   null )     //  5 
-     , new RBBIRuleTableElement(doNOP, 252, 0,0,  false,   null )     //  6 
-     , new RBBIRuleTableElement(doExprStart, 255, 21, 8, false,   null )     //  7 
-     , new RBBIRuleTableElement(doEndOfRule,';',  1,0,  true,   "break-rule-end")     //  8 
-     , new RBBIRuleTableElement(doNOP, 132, 8,0,  true,   null )     //  9 
-     , new RBBIRuleTableElement(doRuleError, 255, 95,0,  false,   null )     //  10 
-     , new RBBIRuleTableElement(doNOP,'!',  13,0,  true,   "rev-option")     //  11 
-     , new RBBIRuleTableElement(doReverseDir, 255, 20, 8, false,   null )     //  12 
-     , new RBBIRuleTableElement(doOptionStart, 130, 15,0,  true,   "option-scan1")     //  13 
-     , new RBBIRuleTableElement(doRuleError, 255, 95,0,  false,   null )     //  14 
-     , new RBBIRuleTableElement(doNOP, 129, 15,0,  true,   "option-scan2")     //  15 
-     , new RBBIRuleTableElement(doOptionEnd, 255, 17,0,  false,   null )     //  16 
-     , new RBBIRuleTableElement(doNOP,';',  1,0,  true,   "option-scan3")     //  17 
-     , new RBBIRuleTableElement(doNOP, 132, 17,0,  true,   null )     //  18 
-     , new RBBIRuleTableElement(doRuleError, 255, 95,0,  false,   null )     //  19 
-     , new RBBIRuleTableElement(doExprStart, 255, 21, 8, false,   "reverse-rule")     //  20 
-     , new RBBIRuleTableElement(doRuleChar, 254, 30,0,  true,   "term")     //  21 
-     , new RBBIRuleTableElement(doNOP, 132, 21,0,  true,   null )     //  22 
-     , new RBBIRuleTableElement(doRuleChar, 131, 30,0,  true,   null )     //  23 
-     , new RBBIRuleTableElement(doNOP,'[',  86, 30, false,   null )     //  24 
-     , new RBBIRuleTableElement(doLParen,'(',  21, 30, true,   null )     //  25 
-     , new RBBIRuleTableElement(doNOP,'$',  80, 29, false,   null )     //  26 
-     , new RBBIRuleTableElement(doDotAny,'.',  30,0,  true,   null )     //  27 
-     , new RBBIRuleTableElement(doRuleError, 255, 95,0,  false,   null )     //  28 
-     , new RBBIRuleTableElement(doCheckVarDef, 255, 30,0,  false,   "term-var-ref")     //  29 
-     , new RBBIRuleTableElement(doNOP, 132, 30,0,  true,   "expr-mod")     //  30 
-     , new RBBIRuleTableElement(doUnaryOpStar,'*',  35,0,  true,   null )     //  31 
-     , new RBBIRuleTableElement(doUnaryOpPlus,'+',  35,0,  true,   null )     //  32 
-     , new RBBIRuleTableElement(doUnaryOpQuestion,'?',  35,0,  true,   null )     //  33 
-     , new RBBIRuleTableElement(doNOP, 255, 35,0,  false,   null )     //  34 
-     , new RBBIRuleTableElement(doExprCatOperator, 254, 21,0,  false,   "expr-cont")     //  35 
-     , new RBBIRuleTableElement(doNOP, 132, 35,0,  true,   null )     //  36 
-     , new RBBIRuleTableElement(doExprCatOperator, 131, 21,0,  false,   null )     //  37 
-     , new RBBIRuleTableElement(doExprCatOperator,'[',  21,0,  false,   null )     //  38 
-     , new RBBIRuleTableElement(doExprCatOperator,'(',  21,0,  false,   null )     //  39 
-     , new RBBIRuleTableElement(doExprCatOperator,'$',  21,0,  false,   null )     //  40 
-     , new RBBIRuleTableElement(doExprCatOperator,'.',  21,0,  false,   null )     //  41 
-     , new RBBIRuleTableElement(doExprCatOperator,'/',  47,0,  false,   null )     //  42 
-     , new RBBIRuleTableElement(doExprCatOperator,'{',  59,0,  true,   null )     //  43 
-     , new RBBIRuleTableElement(doExprOrOperator,'|',  21,0,  true,   null )     //  44 
-     , new RBBIRuleTableElement(doExprRParen,')',  255,0,  true,   null )     //  45 
-     , new RBBIRuleTableElement(doExprFinished, 255, 255,0,  false,   null )     //  46 
-     , new RBBIRuleTableElement(doSlash,'/',  49,0,  true,   "look-ahead")     //  47 
-     , new RBBIRuleTableElement(doNOP, 255, 95,0,  false,   null )     //  48 
-     , new RBBIRuleTableElement(doExprCatOperator, 254, 21,0,  false,   "expr-cont-no-slash")     //  49 
-     , new RBBIRuleTableElement(doNOP, 132, 35,0,  true,   null )     //  50 
-     , new RBBIRuleTableElement(doExprCatOperator, 131, 21,0,  false,   null )     //  51 
-     , new RBBIRuleTableElement(doExprCatOperator,'[',  21,0,  false,   null )     //  52 
-     , new RBBIRuleTableElement(doExprCatOperator,'(',  21,0,  false,   null )     //  53 
-     , new RBBIRuleTableElement(doExprCatOperator,'$',  21,0,  false,   null )     //  54 
-     , new RBBIRuleTableElement(doExprCatOperator,'.',  21,0,  false,   null )     //  55 
-     , new RBBIRuleTableElement(doExprOrOperator,'|',  21,0,  true,   null )     //  56 
-     , new RBBIRuleTableElement(doExprRParen,')',  255,0,  true,   null )     //  57 
-     , new RBBIRuleTableElement(doExprFinished, 255, 255,0,  false,   null )     //  58 
-     , new RBBIRuleTableElement(doNOP, 132, 59,0,  true,   "tag-open")     //  59 
-     , new RBBIRuleTableElement(doStartTagValue, 128, 62,0,  false,   null )     //  60 
-     , new RBBIRuleTableElement(doTagExpectedError, 255, 95,0,  false,   null )     //  61 
-     , new RBBIRuleTableElement(doNOP, 132, 66,0,  true,   "tag-value")     //  62 
-     , new RBBIRuleTableElement(doNOP,'}',  66,0,  false,   null )     //  63 
-     , new RBBIRuleTableElement(doTagDigit, 128, 62,0,  true,   null )     //  64 
-     , new RBBIRuleTableElement(doTagExpectedError, 255, 95,0,  false,   null )     //  65 
-     , new RBBIRuleTableElement(doNOP, 132, 66,0,  true,   "tag-close")     //  66 
-     , new RBBIRuleTableElement(doTagValue,'}',  69,0,  true,   null )     //  67 
-     , new RBBIRuleTableElement(doTagExpectedError, 255, 95,0,  false,   null )     //  68 
-     , new RBBIRuleTableElement(doExprCatOperator, 254, 21,0,  false,   "expr-cont-no-tag")     //  69 
-     , new RBBIRuleTableElement(doNOP, 132, 69,0,  true,   null )     //  70 
-     , new RBBIRuleTableElement(doExprCatOperator, 131, 21,0,  false,   null )     //  71 
-     , new RBBIRuleTableElement(doExprCatOperator,'[',  21,0,  false,   null )     //  72 
-     , new RBBIRuleTableElement(doExprCatOperator,'(',  21,0,  false,   null )     //  73 
-     , new RBBIRuleTableElement(doExprCatOperator,'$',  21,0,  false,   null )     //  74 
-     , new RBBIRuleTableElement(doExprCatOperator,'.',  21,0,  false,   null )     //  75 
-     , new RBBIRuleTableElement(doExprCatOperator,'/',  47,0,  false,   null )     //  76 
-     , new RBBIRuleTableElement(doExprOrOperator,'|',  21,0,  true,   null )     //  77 
-     , new RBBIRuleTableElement(doExprRParen,')',  255,0,  true,   null )     //  78 
-     , new RBBIRuleTableElement(doExprFinished, 255, 255,0,  false,   null )     //  79 
-     , new RBBIRuleTableElement(doStartVariableName,'$',  82,0,  true,   "scan-var-name")     //  80 
-     , new RBBIRuleTableElement(doNOP, 255, 95,0,  false,   null )     //  81 
-     , new RBBIRuleTableElement(doNOP, 130, 84,0,  true,   "scan-var-start")     //  82 
-     , new RBBIRuleTableElement(doVariableNameExpectedErr, 255, 95,0,  false,   null )     //  83 
-     , new RBBIRuleTableElement(doNOP, 129, 84,0,  true,   "scan-var-body")     //  84 
-     , new RBBIRuleTableElement(doEndVariableName, 255, 255,0,  false,   null )     //  85 
-     , new RBBIRuleTableElement(doScanUnicodeSet,'[',  255,0,  true,   "scan-unicode-set")     //  86 
-     , new RBBIRuleTableElement(doScanUnicodeSet,'p',  255,0,  true,   null )     //  87 
-     , new RBBIRuleTableElement(doScanUnicodeSet,'P',  255,0,  true,   null )     //  88 
-     , new RBBIRuleTableElement(doNOP, 255, 95,0,  false,   null )     //  89 
-     , new RBBIRuleTableElement(doNOP, 132, 90,0,  true,   "assign-or-rule")     //  90 
-     , new RBBIRuleTableElement(doStartAssign,'=',  21, 93, true,   null )     //  91 
-     , new RBBIRuleTableElement(doNOP, 255, 29, 8, false,   null )     //  92 
-     , new RBBIRuleTableElement(doEndAssign,';',  1,0,  true,   "assign-end")     //  93 
-     , new RBBIRuleTableElement(doRuleErrorAssignExpr, 255, 95,0,  false,   null )     //  94 
-     , new RBBIRuleTableElement(doExit, 255, 95,0,  true,   "errorDeath")     //  95 
+     , new RBBIRuleTableElement(doNoChain,'^',  12, 9, true,   null )     //  3 
+     , new RBBIRuleTableElement(doExprStart,'$',  88, 98, false,   null )     //  4 
+     , new RBBIRuleTableElement(doNOP,'!',  19,0,  true,   null )     //  5 
+     , new RBBIRuleTableElement(doNOP,';',  1,0,  true,   null )     //  6 
+     , new RBBIRuleTableElement(doNOP, 252, 0,0,  false,   null )     //  7 
+     , new RBBIRuleTableElement(doExprStart, 255, 29, 9, false,   null )     //  8 
+     , new RBBIRuleTableElement(doEndOfRule,';',  1,0,  true,   "break-rule-end")     //  9 
+     , new RBBIRuleTableElement(doNOP, 132, 9,0,  true,   null )     //  10 
+     , new RBBIRuleTableElement(doRuleError, 255, 103,0,  false,   null )     //  11 
+     , new RBBIRuleTableElement(doExprStart, 254, 29,0,  false,   "start-after-caret")     //  12 
+     , new RBBIRuleTableElement(doNOP, 132, 12,0,  true,   null )     //  13 
+     , new RBBIRuleTableElement(doRuleError,'^',  103,0,  false,   null )     //  14 
+     , new RBBIRuleTableElement(doExprStart,'$',  88, 37, false,   null )     //  15 
+     , new RBBIRuleTableElement(doRuleError,';',  103,0,  false,   null )     //  16 
+     , new RBBIRuleTableElement(doRuleError, 252, 103,0,  false,   null )     //  17 
+     , new RBBIRuleTableElement(doExprStart, 255, 29,0,  false,   null )     //  18 
+     , new RBBIRuleTableElement(doNOP,'!',  21,0,  true,   "rev-option")     //  19 
+     , new RBBIRuleTableElement(doReverseDir, 255, 28, 9, false,   null )     //  20 
+     , new RBBIRuleTableElement(doOptionStart, 130, 23,0,  true,   "option-scan1")     //  21 
+     , new RBBIRuleTableElement(doRuleError, 255, 103,0,  false,   null )     //  22 
+     , new RBBIRuleTableElement(doNOP, 129, 23,0,  true,   "option-scan2")     //  23 
+     , new RBBIRuleTableElement(doOptionEnd, 255, 25,0,  false,   null )     //  24 
+     , new RBBIRuleTableElement(doNOP,';',  1,0,  true,   "option-scan3")     //  25 
+     , new RBBIRuleTableElement(doNOP, 132, 25,0,  true,   null )     //  26 
+     , new RBBIRuleTableElement(doRuleError, 255, 103,0,  false,   null )     //  27 
+     , new RBBIRuleTableElement(doExprStart, 255, 29, 9, false,   "reverse-rule")     //  28 
+     , new RBBIRuleTableElement(doRuleChar, 254, 38,0,  true,   "term")     //  29 
+     , new RBBIRuleTableElement(doNOP, 132, 29,0,  true,   null )     //  30 
+     , new RBBIRuleTableElement(doRuleChar, 131, 38,0,  true,   null )     //  31 
+     , new RBBIRuleTableElement(doNOP,'[',  94, 38, false,   null )     //  32 
+     , new RBBIRuleTableElement(doLParen,'(',  29, 38, true,   null )     //  33 
+     , new RBBIRuleTableElement(doNOP,'$',  88, 37, false,   null )     //  34 
+     , new RBBIRuleTableElement(doDotAny,'.',  38,0,  true,   null )     //  35 
+     , new RBBIRuleTableElement(doRuleError, 255, 103,0,  false,   null )     //  36 
+     , new RBBIRuleTableElement(doCheckVarDef, 255, 38,0,  false,   "term-var-ref")     //  37 
+     , new RBBIRuleTableElement(doNOP, 132, 38,0,  true,   "expr-mod")     //  38 
+     , new RBBIRuleTableElement(doUnaryOpStar,'*',  43,0,  true,   null )     //  39 
+     , new RBBIRuleTableElement(doUnaryOpPlus,'+',  43,0,  true,   null )     //  40 
+     , new RBBIRuleTableElement(doUnaryOpQuestion,'?',  43,0,  true,   null )     //  41 
+     , new RBBIRuleTableElement(doNOP, 255, 43,0,  false,   null )     //  42 
+     , new RBBIRuleTableElement(doExprCatOperator, 254, 29,0,  false,   "expr-cont")     //  43 
+     , new RBBIRuleTableElement(doNOP, 132, 43,0,  true,   null )     //  44 
+     , new RBBIRuleTableElement(doExprCatOperator, 131, 29,0,  false,   null )     //  45 
+     , new RBBIRuleTableElement(doExprCatOperator,'[',  29,0,  false,   null )     //  46 
+     , new RBBIRuleTableElement(doExprCatOperator,'(',  29,0,  false,   null )     //  47 
+     , new RBBIRuleTableElement(doExprCatOperator,'$',  29,0,  false,   null )     //  48 
+     , new RBBIRuleTableElement(doExprCatOperator,'.',  29,0,  false,   null )     //  49 
+     , new RBBIRuleTableElement(doExprCatOperator,'/',  55,0,  false,   null )     //  50 
+     , new RBBIRuleTableElement(doExprCatOperator,'{',  67,0,  true,   null )     //  51 
+     , new RBBIRuleTableElement(doExprOrOperator,'|',  29,0,  true,   null )     //  52 
+     , new RBBIRuleTableElement(doExprRParen,')',  255,0,  true,   null )     //  53 
+     , new RBBIRuleTableElement(doExprFinished, 255, 255,0,  false,   null )     //  54 
+     , new RBBIRuleTableElement(doSlash,'/',  57,0,  true,   "look-ahead")     //  55 
+     , new RBBIRuleTableElement(doNOP, 255, 103,0,  false,   null )     //  56 
+     , new RBBIRuleTableElement(doExprCatOperator, 254, 29,0,  false,   "expr-cont-no-slash")     //  57 
+     , new RBBIRuleTableElement(doNOP, 132, 43,0,  true,   null )     //  58 
+     , new RBBIRuleTableElement(doExprCatOperator, 131, 29,0,  false,   null )     //  59 
+     , new RBBIRuleTableElement(doExprCatOperator,'[',  29,0,  false,   null )     //  60 
+     , new RBBIRuleTableElement(doExprCatOperator,'(',  29,0,  false,   null )     //  61 
+     , new RBBIRuleTableElement(doExprCatOperator,'$',  29,0,  false,   null )     //  62 
+     , new RBBIRuleTableElement(doExprCatOperator,'.',  29,0,  false,   null )     //  63 
+     , new RBBIRuleTableElement(doExprOrOperator,'|',  29,0,  true,   null )     //  64 
+     , new RBBIRuleTableElement(doExprRParen,')',  255,0,  true,   null )     //  65 
+     , new RBBIRuleTableElement(doExprFinished, 255, 255,0,  false,   null )     //  66 
+     , new RBBIRuleTableElement(doNOP, 132, 67,0,  true,   "tag-open")     //  67 
+     , new RBBIRuleTableElement(doStartTagValue, 128, 70,0,  false,   null )     //  68 
+     , new RBBIRuleTableElement(doTagExpectedError, 255, 103,0,  false,   null )     //  69 
+     , new RBBIRuleTableElement(doNOP, 132, 74,0,  true,   "tag-value")     //  70 
+     , new RBBIRuleTableElement(doNOP,'}',  74,0,  false,   null )     //  71 
+     , new RBBIRuleTableElement(doTagDigit, 128, 70,0,  true,   null )     //  72 
+     , new RBBIRuleTableElement(doTagExpectedError, 255, 103,0,  false,   null )     //  73 
+     , new RBBIRuleTableElement(doNOP, 132, 74,0,  true,   "tag-close")     //  74 
+     , new RBBIRuleTableElement(doTagValue,'}',  77,0,  true,   null )     //  75 
+     , new RBBIRuleTableElement(doTagExpectedError, 255, 103,0,  false,   null )     //  76 
+     , new RBBIRuleTableElement(doExprCatOperator, 254, 29,0,  false,   "expr-cont-no-tag")     //  77 
+     , new RBBIRuleTableElement(doNOP, 132, 77,0,  true,   null )     //  78 
+     , new RBBIRuleTableElement(doExprCatOperator, 131, 29,0,  false,   null )     //  79 
+     , new RBBIRuleTableElement(doExprCatOperator,'[',  29,0,  false,   null )     //  80 
+     , new RBBIRuleTableElement(doExprCatOperator,'(',  29,0,  false,   null )     //  81 
+     , new RBBIRuleTableElement(doExprCatOperator,'$',  29,0,  false,   null )     //  82 
+     , new RBBIRuleTableElement(doExprCatOperator,'.',  29,0,  false,   null )     //  83 
+     , new RBBIRuleTableElement(doExprCatOperator,'/',  55,0,  false,   null )     //  84 
+     , new RBBIRuleTableElement(doExprOrOperator,'|',  29,0,  true,   null )     //  85 
+     , new RBBIRuleTableElement(doExprRParen,')',  255,0,  true,   null )     //  86 
+     , new RBBIRuleTableElement(doExprFinished, 255, 255,0,  false,   null )     //  87 
+     , new RBBIRuleTableElement(doStartVariableName,'$',  90,0,  true,   "scan-var-name")     //  88 
+     , new RBBIRuleTableElement(doNOP, 255, 103,0,  false,   null )     //  89 
+     , new RBBIRuleTableElement(doNOP, 130, 92,0,  true,   "scan-var-start")     //  90 
+     , new RBBIRuleTableElement(doVariableNameExpectedErr, 255, 103,0,  false,   null )     //  91 
+     , new RBBIRuleTableElement(doNOP, 129, 92,0,  true,   "scan-var-body")     //  92 
+     , new RBBIRuleTableElement(doEndVariableName, 255, 255,0,  false,   null )     //  93 
+     , new RBBIRuleTableElement(doScanUnicodeSet,'[',  255,0,  true,   "scan-unicode-set")     //  94 
+     , new RBBIRuleTableElement(doScanUnicodeSet,'p',  255,0,  true,   null )     //  95 
+     , new RBBIRuleTableElement(doScanUnicodeSet,'P',  255,0,  true,   null )     //  96 
+     , new RBBIRuleTableElement(doNOP, 255, 103,0,  false,   null )     //  97 
+     , new RBBIRuleTableElement(doNOP, 132, 98,0,  true,   "assign-or-rule")     //  98 
+     , new RBBIRuleTableElement(doStartAssign,'=',  29, 101, true,   null )     //  99 
+     , new RBBIRuleTableElement(doNOP, 255, 37, 9, false,   null )     //  100 
+     , new RBBIRuleTableElement(doEndAssign,';',  1,0,  true,   "assign-end")     //  101 
+     , new RBBIRuleTableElement(doRuleErrorAssignExpr, 255, 103,0,  false,   null )     //  102 
+     , new RBBIRuleTableElement(doExit, 255, 103,0,  true,   "errorDeath")     //  103 
  };
-} 
+}; 
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
index 31fb8e1d017..a62b0d917b1 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBIRuleScanner.java
@@ -1,9 +1,9 @@
 /*
  *******************************************************************************
- * Copyright (C) 2003-2011, International Business Machines Corporation and others. All Rights Reserved.
+ * Copyright (C) 2003-2016, International Business Machines Corporation and others. All Rights Reserved.
  *******************************************************************************
  */
- 
+
 package com.ibm.icu.text;
 
 import java.text.ParsePosition;
@@ -19,12 +19,12 @@ import com.ibm.icu.lang.UCharacter;
   *  There is no public API here.
   */
 class RBBIRuleScanner {
-    
+
     private final static int    kStackSize = 100;               // The size of the state stack for
     //   rules parsing.  Corresponds roughly
     //   to the depth of parentheses nesting
     //   that is allowed in the rules.
-    
+
     static class RBBIRuleChar {
         int             fChar;
         boolean         fEscaped;
@@ -33,7 +33,7 @@ class RBBIRuleScanner {
 
 
     RBBIRuleBuilder               fRB;              // The rule builder that we are part of.
-    
+
     int                       fScanIndex;        // Index of current character being processed
                                                      //   in the rule input string.
     int                       fNextIndex;        // Index of the next character, which
@@ -43,49 +43,52 @@ class RBBIRuleScanner {
     int                       fCharNum;          // Char position within the line.
     int                       fLastChar;         // Previous char, needed to count CR-LF
                                                      //   as a single line, not two.
-    
+
     RBBIRuleChar              fC = new RBBIRuleChar();    // Current char for parse state machine
                                                      //   processing.
     String                    fVarName;          // $variableName, valid when we've just
                                                      //   scanned one.
-    
-    
+
+
     short  fStack[] = new short[kStackSize];  // State stack, holds state pushes
     int                       fStackPtr;           //  and pops as specified in the state
                                                        //  transition rules.
-    
+
     RBBINode  fNodeStack[] = new RBBINode[kStackSize]; // Node stack, holds nodes created
                                                            //  during the parse of a rule
     int                        fNodeStackPtr;
-    
-    
-    boolean                          fReverseRule;     // True if the rule currently being scanned
+
+
+    boolean                    fReverseRule;         // True if the rule currently being scanned
                                                      //  is a reverse direction rule (if it
                                                      //  starts with a '!')
-    
-    boolean                          fLookAheadRule;   // True if the rule includes a '/'
+
+    boolean                    fLookAheadRule;       // True if the rule includes a '/'
                                                      //   somewhere within it.
-    
-    RBBISymbolTable              fSymbolTable;     // symbol table, holds definitions of
+
+    boolean                    fNoChainInRule;       // True if the current rule starts with a '^'.
+
+
+    RBBISymbolTable            fSymbolTable;         // symbol table, holds definitions of
                                                      //   $variable symbols.
-    
+
     HashMap<String, RBBISetTableEl> fSetTable = new HashMap<String, RBBISetTableEl>(); // UnicocodeSet hash table, holds indexes to
                                                                                        //   the sets created while parsing rules.
                                                                                        //   The key is the string used for creating
                                                                                        //   the set.
-    
+
     UnicodeSet      fRuleSets[] = new UnicodeSet[10];    // Unicode Sets that are needed during
                                                      //  the scanning of RBBI rules.  The
                                                      //  indicies for these are assigned by the
                                                      //  perl script that builds the state tables.
                                                      //  See rbbirpt.h.
-    
+
     int                        fRuleNum;         // Counts each rule as it is scanned.
-    
+
     int                        fOptionStart;     // Input index of start of a !!option
                                                  //   keyword, while being scanned.
 
-    
+
 
    static private String gRuleSet_rule_char_pattern       = "[^[\\p{Z}\\u0020-\\u007f]-[\\p{L}]-[\\p{N}]]";
    static private String gRuleSet_name_char_pattern       = "[_\\p{L}\\p{N}]";
@@ -94,8 +97,8 @@ class RBBIRuleScanner {
    static private String gRuleSet_white_space_pattern     = "[\\p{Pattern_White_Space}]";
    static private String kAny =  "any";
 
-    
- 
+
+
 
     //----------------------------------------------------------------------------------------
     //
@@ -139,6 +142,12 @@ class RBBIRuleScanner {
             fRuleNum++;
             break;
 
+        case RBBIRuleParseTable.doNoChain:
+            // Scanned a '^' while on the rule start state.
+            fNoChainInRule = true;
+            break;
+
+
         case RBBIRuleParseTable.doExprOrOperator: {
             fixOpStack(RBBINode.precOpCat);
             RBBINode operandNode = fNodeStack[fNodeStackPtr--];
@@ -241,11 +250,11 @@ class RBBIRuleScanner {
                 printNodeStack("end of rule");
             }
             Assert.assrt(fNodeStackPtr == 1);
+            RBBINode thisRule = fNodeStack[fNodeStackPtr];
 
             // If this rule includes a look-ahead '/', add a endMark node to the
             //   expression tree.
             if (fLookAheadRule) {
-                RBBINode thisRule = fNodeStack[fNodeStackPtr];
                 RBBINode endNode = pushNewNode(RBBINode.endMark);
                 RBBINode catNode = pushNewNode(RBBINode.opCat);
                 fNodeStackPtr -= 2;
@@ -254,8 +263,24 @@ class RBBIRuleScanner {
                 fNodeStack[fNodeStackPtr] = catNode;
                 endNode.fVal = fRuleNum;
                 endNode.fLookAheadEnd = true;
+                thisRule = catNode;
+
+                // TODO: Disable chaining out of look-ahead (hard break) rules.
+                //   The break on rule match is forced, so there is no point in building up
+                //   the state table to chain into another rule for a longer match.
             }
 
+            // Mark this node as being the root of a rule.
+            thisRule.fRuleRoot = true;
+
+            // Flag if chaining into this rule is wanted.
+            //
+            if (fRB.fChainRules &&          // If rule chaining is enabled globally via !!chain
+                    !fNoChainInRule) {      //     and no '^' chain-in inhibit was on this rule
+                thisRule.fChainIn = true;
+            }
+
+
             // All rule expressions are ORed together.
             // The ';' that terminates an expression really just functions as a
             // '|' with
@@ -269,12 +294,12 @@ class RBBIRuleScanner {
             int destRules = (fReverseRule ? RBBIRuleBuilder.fReverseTree : fRB.fDefaultTree);
 
             if (fRB.fTreeRoots[destRules] != null) {
-                // This is not the first rule encounted.
+                // This is not the first rule encountered.
                 // OR previous stuff (from *destRules)
                 // with the current rule expression (on the Node Stack)
                 //  with the resulting OR expression going to *destRules
                 //
-                RBBINode thisRule = fNodeStack[fNodeStackPtr];
+                thisRule = fNodeStack[fNodeStackPtr];
                 RBBINode prevRules = fRB.fTreeRoots[destRules];
                 RBBINode orNode = pushNewNode(RBBINode.opOr);
                 orNode.fLeftChild = prevRules;
@@ -289,6 +314,7 @@ class RBBIRuleScanner {
             }
             fReverseRule = false; // in preparation for the next rule.
             fLookAheadRule = false;
+            fNoChainInRule = false;
             fNodeStackPtr = 0;
         }
             break;
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
index 73c9c4c92f0..2140d5ed4f2 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RBBITableBuilder.java
@@ -1,6 +1,6 @@
 /*
 **********************************************************************
-*   Copyright (c) 2002-2009, International Business Machines
+*   Copyright (c) 2002-2016, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 **********************************************************************
 */
@@ -28,9 +28,9 @@ import com.ibm.icu.lang.UProperty;
 //                         There is no user-visible public API here.
 //
 class RBBITableBuilder {
-    
-    
-    
+
+
+
     //
     //  RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
     //                        one for each state.
@@ -58,8 +58,8 @@ class RBBITableBuilder {
                                                     //   symbol.
         }
     }
-    
-    
+
+
     private  RBBIRuleBuilder  fRB;
     private  int             fRootIx;             // The array index into RBBIRuleBuilder.fTreeRoots
                                                    //   for the parse tree to operate on.
@@ -84,7 +84,7 @@ class RBBITableBuilder {
 
 
 
- 
+
        //-----------------------------------------------------------------------------
        //
        //   RBBITableBuilder::build  -  This is the main function for building the DFA state transtion
@@ -109,11 +109,11 @@ class RBBITableBuilder {
            }
 
            //
-           // If the rules contained any references to {bof} 
+           // If the rules contained any references to {bof}
            //   add a {bof} <cat> <former root of tree> to the
-           //   tree.  Means that all matches must start out with the 
+           //   tree.  Means that all matches must start out with the
            //   {bof} fake character.
-           // 
+           //
            if (fRB.fSetBuilder.sawBOF()) {
                RBBINode bofTop    = new RBBINode(RBBINode.opCat);
                RBBINode bofLeaf   = new RBBINode(RBBINode.leafChar);
@@ -361,6 +361,25 @@ class RBBITableBuilder {
            }
        }
 
+       //-----------------------------------------------------------------------------
+       //
+       //           addRuleRootNodes    Recursively walk a parse tree, adding all nodes flagged
+       //                               as roots of a rule to a destination vector.
+       //
+       //-----------------------------------------------------------------------------
+       void addRuleRootNodes(List<RBBINode> dest, RBBINode node) {
+           if (node == null) {
+               return;
+           }
+           if (node.fRuleRoot) {
+               dest.add(node);
+               // Note: rules cannot nest. If we found a rule start node,
+               //       no child node can also be a start node.
+               return;
+           }
+           addRuleRootNodes(dest, node.fLeftChild);
+           addRuleRootNodes(dest, node.fRightChild);
+       }
 
        //-----------------------------------------------------------------------------
        //
@@ -379,17 +398,21 @@ class RBBITableBuilder {
            // get a list all leaf nodes
            tree.findNodes(leafNodes, RBBINode.leafChar);
 
-           // Get all nodes that can be the start a match, which is FirstPosition()
-           // of the portion of the tree corresponding to user-written rules.
-           // See the tree description in bofFixup().
-           RBBINode userRuleRoot = tree;
-           if (fRB.fSetBuilder.sawBOF()) {
-               userRuleRoot = tree.fLeftChild.fRightChild;
+           // Collect all leaf nodes that can start matches for rules
+           // with inbound chaining enabled, which is the union of the
+           // firstPosition sets from each of the rule root nodes.
+
+           List<RBBINode> ruleRootNodes = new ArrayList<RBBINode>();
+           addRuleRootNodes(ruleRootNodes, tree);
+
+           Set<RBBINode> matchStartNodes = new HashSet<RBBINode>();
+           for (RBBINode node: ruleRootNodes) {
+               if (node.fChainIn) {
+                   matchStartNodes.addAll(node.fFirstPosSet);
+               }
            }
-           Assert.assrt(userRuleRoot != null);
-           Set<RBBINode> matchStartNodes = userRuleRoot.fFirstPosSet;
 
-           // Iteratate over all leaf nodes,
+           // Iterate over all leaf nodes,
            //
            for (RBBINode tNode : leafNodes) {
                RBBINode endNode = null;
@@ -461,9 +484,9 @@ class RBBITableBuilder {
            //
            //   The parse tree looks like this ...
            //         fTree root  --.       <cat>
-           //                               /     \   
+           //                               /     \
            //                            <cat>   <#end node>
-           //                           /     \   
+           //                           /     \
            //                     <bofNode>   rest
            //                               of tree
            //
@@ -477,7 +500,7 @@ class RBBITableBuilder {
            //  (excluding the fake bofNode)
            //  We want the nodes that can start a match in the
            //     part labeled "rest of tree"
-           // 
+           //
            Set<RBBINode> matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
            for (RBBINode startNode : matchStartNodes) {
                if (startNode.fType != RBBINode.leafChar) {
@@ -489,7 +512,7 @@ class RBBITableBuilder {
                    //    explicitly written into a rule.
                    //  Add everything from the followPos set of this node to the
                    //    followPos set of the fake bofNode at the start of the tree.
-                   //  
+                   //
                    bofNode.fFollowPos.addAll(startNode.fFollowPos);
                }
            }
@@ -705,7 +728,7 @@ class RBBITableBuilder {
        //      The RBBI runtime uses an array of {sets of status values} that can
        //      be returned for boundaries.  Each accepting state that has non-zero
        //      status includes an index into this array.  The format of the array
-       //      is 
+       //      is
        //           Num of status values in group 1
        //              status val
        //              status val
@@ -718,7 +741,7 @@ class RBBITableBuilder {
        //
        //
        //-----------------------------------------------------------------------------
-       
+
        void  mergeRuleStatusVals() {
            //
            //  The basic outline of what happens here is this...
@@ -731,14 +754,14 @@ class RBBITableBuilder {
            //           add the tag list for this state to the global list.
            //
            int n;
-           
+
            // Pre-load a single tag of {0} into the table.
            //   We will need this as a default, for rule sets with no explicit tagging,
            //   or with explicit tagging of {0}.
            if (fRB.fRuleStatusVals.size() == 0) {
                fRB.fRuleStatusVals.add(Integer.valueOf(1));    // Num of statuses in group
                fRB.fRuleStatusVals.add(Integer.valueOf(0));    //   and our single status of zero
-               
+
                SortedSet<Integer> s0 = new TreeSet<Integer>();
                Integer izero = Integer.valueOf(0);
                fRB.fStatusSets.put(s0, izero);
@@ -756,17 +779,17 @@ class RBBITableBuilder {
                if (arrayIndexI == null) {
                    // This is the first encounter of this set of status values.
                    //   Add them to the statusSets map, This map associates
-                   //   the set of status values with an index in the runtime status 
+                   //   the set of status values with an index in the runtime status
                    //   values array.
                    arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size());
                    fRB.fStatusSets.put(statusVals, arrayIndexI);
-                   
+
                    // Add the new set of status values to the vector of values that
                    //   will eventually become the array used by the runtime engine.
                    fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size()));
                    fRB.fRuleStatusVals.addAll(statusVals);
                }
-               
+
                // Save the runtime array index back into the state descriptor.
                sd.fTagsIdx = arrayIndexI.intValue();
            }
@@ -784,7 +807,7 @@ class RBBITableBuilder {
        //                 for each node in the tree.
        //
        //-----------------------------------------------------------------------------
-       
+
        void printPosSets(RBBINode n) {
            if (n==null) {
                return;
@@ -804,7 +827,7 @@ class RBBITableBuilder {
            printPosSets(n.fLeftChild);
            printPosSets(n.fRightChild);
        }
-       
+
 
 
 
@@ -860,7 +883,7 @@ class RBBITableBuilder {
        //                    See struct RBBIStateTable in ICU4C, common/rbbidata.h
        //
        //-----------------------------------------------------------------------------
-       
+
        short [] exportTable() {
            int                state;
            int                col;
@@ -870,18 +893,18 @@ class RBBITableBuilder {
            }
 
            Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
-               fDStates.size() < 0x7fff); 
+               fDStates.size() < 0x7fff);
 
            int numStates = fDStates.size();
-    
+
            // Size of table size in shorts.
            //  the "4" is the size of struct RBBIStateTableRow, the row header part only.
            int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories();
            int tableSize = getTableSize() / 2;
 
-           
+
            short [] table = new short[tableSize];
-           
+
            //
            // Fill in the header fields.
            //      Annoying because they really want to be ints, not shorts.
@@ -893,7 +916,7 @@ class RBBITableBuilder {
            // RBBIStateTable.fRowLen
            table[RBBIDataWrapper.ROWLEN]   = (short)(rowLen >>> 16);
            table[RBBIDataWrapper.ROWLEN+1] = (short)(rowLen & 0x0000ffff);
-           
+
            // RBBIStateTable.fFlags
            int flags = 0;
            if (fRB.fLookAheadHardBreak) {
@@ -904,7 +927,7 @@ class RBBITableBuilder {
            }
            table[RBBIDataWrapper.FLAGS]   = (short)(flags >>> 16);
            table[RBBIDataWrapper.FLAGS+1] = (short)(flags & 0x0000ffff);
-           
+
            int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
            for (state=0; state<numStates; state++) {
                RBBIStateDescriptor sd = fDStates.get(state);
@@ -928,14 +951,14 @@ class RBBITableBuilder {
        //   printSet    Debug function.   Print the contents of a set of Nodes
        //
        //-----------------------------------------------------------------------------
-       
+
        void printSet(Collection<RBBINode> s) {
            for (RBBINode n : s) {
                RBBINode.printInt(n.fSerialNum, 8);
            }
            System.out.println();
        }
-       
+
 
 
        //-----------------------------------------------------------------------------
@@ -943,7 +966,7 @@ class RBBITableBuilder {
        //   printStates    Debug Function.  Dump the fully constructed state transition table.
        //
        //-----------------------------------------------------------------------------
-       
+
        void printStates() {
            int     c;    // input "character"
            int     n;    // state number
@@ -964,7 +987,7 @@ class RBBITableBuilder {
                RBBIStateDescriptor sd = fDStates.get(n);
                RBBINode.printInt(n, 5);
                System.out.print(" | ");
-               
+
                RBBINode.printInt(sd.fAccepting, 3);
                RBBINode.printInt(sd.fLookAhead, 4);
                RBBINode.printInt(sd.fTagsIdx, 6);
@@ -976,7 +999,7 @@ class RBBITableBuilder {
            }
            System.out.print("\n\n");
        }
-       
+
 
 
 
@@ -985,7 +1008,7 @@ class RBBITableBuilder {
        //   printRuleStatusTable    Debug Function.  Dump the common rule status table
        //
        //-----------------------------------------------------------------------------
-       
+
        void printRuleStatusTable() {
            int  thisRecord = 0;
            int  nextRecord = 0;
@@ -1007,7 +1030,7 @@ class RBBITableBuilder {
            }
            System.out.print("\n\n");
        }
-       
+
 
 
 }
diff --git a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
index 644d788828f..7d5f4611ef8 100644
--- a/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
+++ b/icu4j/main/classes/core/src/com/ibm/icu/text/RuleBasedBreakIterator.java
@@ -30,17 +30,17 @@ import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.lang.UScript;
 
 /**
- * Rule Based Break Iterator 
+ * Rule Based Break Iterator
  * This is a port of the C++ class RuleBasedBreakIterator from ICU4C.
- * 
+ *
  * @stable ICU 2.0
  */
 public class RuleBasedBreakIterator extends BreakIterator {
     //=======================================================================
     // Constructors & Factories
     //=======================================================================
-    
-    /** 
+
+    /**
      * private constructor
      */
     private RuleBasedBreakIterator() {
@@ -51,14 +51,14 @@ public class RuleBasedBreakIterator extends BreakIterator {
 
     /**
      * Create a break iterator from a precompiled set of break rules.
-     * 
+     *
      * Creating a break iterator from the binary rules is much faster than
-     * creating one from source rules. 
-     * 
+     * creating one from source rules.
+     *
      * The binary rules are generated by the RuleBasedBreakIterator.compileRules() function.
      * Binary break iterator rules are not guaranteed to be compatible between
      * different versions of ICU.
-     * 
+     *
      * @param is an input stream supplying the compiled binary rules.
      * @throws IOException if there is an error while reading the rules from the InputStream.
      * @see    #compileRules(String, OutputStream)
@@ -67,7 +67,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
     public static RuleBasedBreakIterator getInstanceFromCompiledRules(InputStream is) throws IOException {
         RuleBasedBreakIterator  This = new RuleBasedBreakIterator();
         This.fRData = RBBIDataWrapper.get(ICUBinary.getByteBufferFromInputStreamAndCloseStream(is));
-        return This;   
+        return This;
     }
 
     /**
@@ -129,7 +129,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
     {
         RuleBasedBreakIterator result = (RuleBasedBreakIterator)super.clone();
         if (fText != null) {
-            result.fText = (CharacterIterator)(fText.clone());   
+            result.fText = (CharacterIterator)(fText.clone());
         }
         return result;
     }
@@ -151,15 +151,15 @@ public class RuleBasedBreakIterator extends BreakIterator {
             if (fRData != other.fRData && (fRData == null || other.fRData == null)) {
                 return false;
             }
-            if (fRData != null && other.fRData != null && 
+            if (fRData != null && other.fRData != null &&
                     (!fRData.fRuleSource.equals(other.fRData.fRuleSource))) {
                 return false;
             }
             if (fText == null && other.fText == null) {
-                return true;   
+                return true;
             }
             if (fText == null || other.fText == null) {
-                return false;   
+                return false;
             }
             return fText.equals(other.fText);
         }
@@ -188,13 +188,13 @@ public class RuleBasedBreakIterator extends BreakIterator {
      */
     public int hashCode()
     {
-        return fRData.fRuleSource.hashCode(); 
+        return fRData.fRuleSource.hashCode();
     }
 
 
     private static final int  START_STATE = 1;     // The state number of the starting state
     private static final int  STOP_STATE  = 0;     // The state-transition value indicating "stop"
-    
+
     // RBBIRunMode - the state machine runs an extra iteration at the beginning and end
     //               of user text.  A variable with this enum type keeps track of where we
     //               are.  The state machine only fetches user text input while in RUN mode.
@@ -206,14 +206,14 @@ public class RuleBasedBreakIterator extends BreakIterator {
      * The character iterator through which this BreakIterator accesses the text.
      */
     private CharacterIterator   fText = new java.text.StringCharacterIterator("");
-    
+
     /**
      * The rule data for this BreakIterator instance. Package private.
      */
     RBBIDataWrapper             fRData;
-    
+
     /*
-     * Index of the Rule {tag} values for the most recent match. 
+     * Index of the Rule {tag} values for the most recent match.
      */
     private int                 fLastRuleStatusIndex;
 
@@ -245,18 +245,18 @@ public class RuleBasedBreakIterator extends BreakIterator {
             && ICUDebug.value(RBBI_DEBUG_ARG).indexOf("trace") >= 0;
 
     /**
-     * What kind of break iterator this is. Set to KIND_LINE by default, 
+     * What kind of break iterator this is. Set to KIND_LINE by default,
      * since this produces sensible output.
      */
     private int fBreakType = KIND_LINE;
-    
+
     /**
      * The "default" break engine - just skips over ranges of dictionary words,
      * producing no breaks. Should only be used if characters need to be handled
      * by a dictionary but we have no dictionary implementation for them.
      */
     private final UnhandledBreakEngine fUnhandledBreakEngine = new UnhandledBreakEngine();
-    
+
     /**
      * when a range of characters is divided up using the dictionary, the break
      * positions that are discovered are stored here, preventing us from having
@@ -271,8 +271,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
      */
     private int fPositionInCache;
 
-    
-    private final ConcurrentHashMap<Integer, LanguageBreakEngine> fBreakEngines = 
+
+    private final ConcurrentHashMap<Integer, LanguageBreakEngine> fBreakEngines =
             new ConcurrentHashMap<Integer, LanguageBreakEngine>();
     /**
      * Dumps caches and performs other actions associated with a complete change
@@ -293,18 +293,18 @@ public class RuleBasedBreakIterator extends BreakIterator {
      */
     @Deprecated
     public void dump() {
-        this.fRData.dump();   
+        this.fRData.dump();
     }
 
     /**
      * Compile a set of source break rules into the binary state tables used
      * by the break iterator engine.  Creating a break iterator from precompiled
      * rules is much faster than creating one from source rules.
-     * 
+     *
      * Binary break rules are not guaranteed to be compatible between different
      * versions of ICU.
-     * 
-     * 
+     *
+     *
      * @param rules  The source form of the break rules
      * @param ruleBinary  An output stream to receive the compiled rules.
      * @throws IOException If there is an error writing the output.
@@ -314,7 +314,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
     public static void compileRules(String rules, OutputStream ruleBinary) throws IOException {
         RBBIRuleBuilder.compileRules(rules, ruleBinary);
     }
-    
+
     //=======================================================================
     // BreakIterator overrides
     //=======================================================================
@@ -337,7 +337,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         fText.first();
         return fText.getIndex();
     }
-    
+
     /**
      * Sets the current iteration position to the end of the text.
      * (i.e., the CharacterIterator's ending offset).
@@ -364,7 +364,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         fText.setIndex(pos);
         return pos;
     }
-    
+
     /**
      * Advances the iterator either forward or backward the specified number of steps.
      * Negative values move backward, and positive values move forward.  This is
@@ -387,7 +387,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         }
         return result;
     }
-    
+
     /**
      * Advances the iterator to the next boundary position.
      * @return The position of the first boundary after this one.
@@ -424,11 +424,11 @@ public class RuleBasedBreakIterator extends BreakIterator {
       *                       process.
       */
     private int checkDictionary(int startPos, int endPos, boolean reverse) {
-        
+
         // Reset the old break cache first.
         reset();
 
-        // note: code segment below assumes that dictionary chars are in the 
+        // note: code segment below assumes that dictionary chars are in the
         // startPos-endPos range
         // value returned should be next character in sequence
         if ((endPos - startPos) <= 1) {
@@ -465,7 +465,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
                     c = CharacterIteration.current32(fText);
                     category = (short)fRData.fTrie.getCodePointValue(c);
                 } while (c != CharacterIteration.DONE32 && ((category & 0x4000)) != 0);
-                
+
                 // Back up to the last dictionary character
                 rangeEnd = fText.getIndex();
                 if (c == CharacterIteration.DONE32) {
@@ -497,7 +497,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
             category = (short)fRData.fTrie.getCodePointValue(c);
         }
 
-        
+
         // Loop through the text, looking for ranges of dictionary characters.
         // For each span, find the appropriate break engine, and ask it to find
         // any breaks within the span.
@@ -518,11 +518,11 @@ public class RuleBasedBreakIterator extends BreakIterator {
             if (current >= rangeEnd) {
                 break;
             }
-            
+
             // We now have a dictionary character. Get the appropriate language object
             // to deal with it.
             lbe = getLanguageBreakEngine(c);
-            
+
             // Ask the language object if there are any breaks. It will leave the text
             // pointer on the other side of its range, ready to search for the next one.
             if (lbe != null) {
@@ -530,12 +530,12 @@ public class RuleBasedBreakIterator extends BreakIterator {
                 foundBreakCount += lbe.findBreaks(fText, rangeStart, rangeEnd, false, fBreakType, breaks);
                 assert fText.getIndex() > startingIdx;
             }
-            
+
             // Reload the loop variables for the next go-round
             c = CharacterIteration.current32(fText);
             category = (short)fRData.fTrie.getCodePointValue(c);
         }
-        
+
         // If we found breaks, build a new break cache. The first and last entries must
         // be the original starting and ending position.
         if (foundBreakCount > 0) {
@@ -549,15 +549,15 @@ public class RuleBasedBreakIterator extends BreakIterator {
             if (endPos > breaks.peek()) {
                 breaks.push(endPos);
             }
-            
+
             // TODO: get rid of this array, use results from the deque directly
             fCachedBreakPositions = new int[breaks.size()];
-            
+
             int i = 0;
             while (breaks.size() > 0) {
                 fCachedBreakPositions[i++] = breaks.pollLast();
             }
-            
+
             // If there are breaks, then by definition, we are replacing the original
             // proposed break by one of the breaks we found. Use following() and
             // preceding() to do the work. They should never recurse in this case.
@@ -573,10 +573,10 @@ public class RuleBasedBreakIterator extends BreakIterator {
         // to the original proposed break.
         fText.setIndex(reverse ? startPos : endPos);
         return (reverse ? startPos : endPos);
- 
+
         }
-    
-    
+
+
     /**
      * Moves the iterator backwards, to the last boundary preceding this one.
      * @return The position of the last boundary position preceding this one.
@@ -585,7 +585,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
     public int previous() {
         int result;
         int startPos;
-        
+
         CharacterIterator text = getText();
 
         fLastStatusIndexValid = false;
@@ -705,7 +705,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
             return text.getIndex();
         }
     }
-    
+
     private int rulesFollowing(int offset) {
         // if the offset passed in is already past the end of the text,
         // just return DONE; if it's before the beginning, return the
@@ -744,7 +744,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         }
         if (fRData.fSFTable != null) {
             // No Safe point reverse table, but there is a safe pt forward table.
-            // 
+            //
             fText.setIndex(offset);
             previous32(fText);
             // handle next will give result >= offset
@@ -820,7 +820,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
             return text.getIndex();
         }
     }
-    
+
     private int rulesPreceding(int offset) {
         // if the offset passed in is already past the end of the text,
         // just return DONE; if it's before the beginning, return the
@@ -1002,7 +1002,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
     }
 
     /**
-     * Get the status (tag) values from the break rule(s) that determined the most 
+     * Get the status (tag) values from the break rule(s) that determined the most
      * recently returned break position.  The values appear in the rule source
      * within brackets, {123}, for example.  The default status value for rules
      * that do not explicitly provide one is zero.
@@ -1014,8 +1014,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
      *  the output will be truncated to the available length.  No exception
      *  will be thrown.
      *
-     * @param fillInArray an array to be filled in with the status values.  
-     * @return          The number of rule status values from rules that determined 
+     * @param fillInArray an array to be filled in with the status values.
+     * @return          The number of rule status values from rules that determined
      *                  the most recent boundary returned by the break iterator.
      *                  In the event that the array is too small, the return value
      *                  is the total number of status values that were available,
@@ -1026,7 +1026,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
     public int getRuleStatusVec(int[] fillInArray) {
         makeRuleStatusValid();
         int numStatusVals = fRData.fStatusTable[fLastRuleStatusIndex];
-        if (fillInArray != null) {  
+        if (fillInArray != null) {
             int numToCopy = Math.min(numStatusVals, fillInArray.length);
             for (int i=0; i<numToCopy; i++) {
                 fillInArray[i] = fRData.fStatusTable[fLastRuleStatusIndex + i + 1];
@@ -1079,8 +1079,8 @@ public class RuleBasedBreakIterator extends BreakIterator {
      */
     static final String fDebugEnv = ICUDebug.enabled(RBBI_DEBUG_ARG) ?
                                         ICUDebug.value(RBBI_DEBUG_ARG) : null;
-    
-    
+
+
     private LanguageBreakEngine getLanguageBreakEngine(int c) {
 
         // We have a dictionary character.
@@ -1098,7 +1098,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
             // Fold them together for mapping from script -> engine.
             script = UScript.HAN;
         }
-        
+
         LanguageBreakEngine eng = fBreakEngines.get(script);
         /*
         if (eng != null && !eng.handles(c, fBreakType)) {
@@ -1158,15 +1158,60 @@ public class RuleBasedBreakIterator extends BreakIterator {
         return eng;
     }
 
-   
+    private static final int kMaxLookaheads = 8;
+    private static class LookAheadResults {
+        int      fUsedSlotLimit;
+        int[]    fPositions;
+        int[]    fKeys;
+
+        LookAheadResults() {
+            fUsedSlotLimit= 0;
+            fPositions = new int[kMaxLookaheads];
+            fKeys = new int[kMaxLookaheads];
+        }
+
+        int getPosition(int key) {
+            for (int i=0; i<fUsedSlotLimit; ++i) {
+                if (fKeys[i] == key) {
+                    return fPositions[i];
+                }
+            }
+            assert(false);
+            return -1;
+        }
+
+        void setPosition(int key, int position) {
+            int i;
+            for (i=0; i<fUsedSlotLimit; ++i) {
+                if (fKeys[i] == key) {
+                    fPositions[i] = position;
+                    return;
+                }
+            }
+            if (i >= kMaxLookaheads) {
+                assert(false);
+                i = kMaxLookaheads - 1;
+            }
+            fKeys[i] = key;
+            fPositions[i] = position;
+            assert(fUsedSlotLimit == i);
+            fUsedSlotLimit = i + 1;
+        }
+
+        void reset() {
+            fUsedSlotLimit = 0;
+        }
+    };
+    private LookAheadResults fLookAheadMatches = new LookAheadResults();
+
 
     /**
      * The State Machine Engine for moving forward is here.
      * This function is the heart of the RBBI run time engine.
-     * 
+     *
      * @param stateTable
      * @return the new iterator position
-     * 
+     *
      * A note on supplementary characters and the position of underlying
      * Java CharacterIterator:   Normally, a character iterator is positioned at
      * the char most recently returned by next().  Within this function, when
@@ -1201,7 +1246,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
 
         // Set the initial state for the state machine
         int state           = START_STATE;
-        int row             = fRData.getRowIndex(state); 
+        int row             = fRData.getRowIndex(state);
         short category      = 3;
         int flagsState      = fRData.getStateTableFlags(stateTable);
         int mode            = RBBI_RUN;
@@ -1209,14 +1254,12 @@ public class RuleBasedBreakIterator extends BreakIterator {
             category = 2;
             mode     = RBBI_START;
             if (TRACE) {
-                System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5)); 
+                System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
                 System.out.print(RBBIDataWrapper.intToHexString(c, 10));
                 System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
             }
         }
-        int lookaheadStatus = 0;
-        int lookaheadTagIdx = 0;
-        int lookaheadResult = 0;
+        fLookAheadMatches.reset();
 
         // loop until we reach the end of the text or transition to state 0
         while (state != STOP_STATE) {
@@ -1226,16 +1269,6 @@ public class RuleBasedBreakIterator extends BreakIterator {
                     // We have already run the loop one last time with the
                     // character set to the pseudo {eof} value. Now it is time
                     // to unconditionally bail out.
-
-                    if (lookaheadResult > result) {
-                        // We ran off the end of the string with a pending
-                        // look-ahead match.
-                        // Treat this as if the look-ahead condition had been
-                        // met, and return
-                        // the match at the / position from the look-ahead rule.
-                        result = lookaheadResult;
-                        fLastRuleStatusIndex = lookaheadTagIdx;
-                    }
                     break;
                 }
                 // Run the loop one last time with the fake end-of-input character category
@@ -1252,7 +1285,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
                 // which column in the state table to look at.
                 //
                 category = (short) trie.getCodePointValue(c);
-                
+
                 // Check the dictionary bit in the character's category.
                 //    Counter is only used by dictionary based iterators (subclasses).
                 //    Chars that need to be handled by a dictionary have a flag bit set
@@ -1265,15 +1298,15 @@ public class RuleBasedBreakIterator extends BreakIterator {
                 }
 
                 if (TRACE) {
-                    System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5)); 
+                    System.out.print("            " +  RBBIDataWrapper.intToString(text.getIndex(), 5));
                     System.out.print(RBBIDataWrapper.intToHexString(c, 10));
                     System.out.println(RBBIDataWrapper.intToString(state,7) + RBBIDataWrapper.intToString(category,6));
                 }
 
-                // Advance to the next character.  
+                // Advance to the next character.
                 // If this is a beginning-of-input loop iteration, don't advance.
                 //    The next iteration will be processing the first real input character.
-                c = (int)text.next(); 
+                c = (int)text.next();
                 if (c >= UTF16.LEAD_SURROGATE_MIN_VALUE) {
                     c = nextTrail32(text, c);
                 }
@@ -1284,7 +1317,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
 
             // look up a state transition in the state table
             state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
-            row   = fRData.getRowIndex(state);  
+            row   = fRData.getRowIndex(state);
 
             if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
                 // Match found, common case
@@ -1299,40 +1332,30 @@ public class RuleBasedBreakIterator extends BreakIterator {
                 fLastRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
             }
 
-            if (stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0) {
-                if (lookaheadStatus != 0
-                    && stateTable[row + RBBIDataWrapper.ACCEPTING] == lookaheadStatus) {
-                    // Lookahead match is completed.  Set the result accordingly, but only
-                    // if no other rule has matched further in the mean time.
-                    result               = lookaheadResult;
-                    fLastRuleStatusIndex = lookaheadTagIdx;
-                    lookaheadStatus      = 0;
-                    // TODO: make a standalone hard break in a rule work.
-                    if ((flagsState & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0) {
-                        text.setIndex(result);
-                        return result;
-                    }
-                    // Look-ahead completed, but other rules may match further.  Continue on.
-                    //   TODO:  junk this feature?  I don't think it's used anywhere.
-                    continue;
+            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
+            if (completedRule > 0) {
+                // Lookahead match is completed
+                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
+                if (lookaheadResult >= 0) {
+                    fLastRuleStatusIndex = stateTable[row + RBBIDataWrapper.TAGIDX];
+                    text.setIndex(lookaheadResult);
+                    return lookaheadResult;
                 }
+            }
 
-                lookaheadResult = text.getIndex();
+            int rule =  stateTable[row + RBBIDataWrapper.LOOKAHEAD];
+            if (rule != 0) {
+                // At the position of a '/' in a look-ahead match. Record it.
+                int  pos = text.getIndex();
                 if (c >= UTF16.SUPPLEMENTARY_MIN_VALUE && c <= UTF16.CODEPOINT_MAX_VALUE) {
                     // The iterator has been left in the middle of a surrogate pair.
                     // We want the beginning  of it.
-                    lookaheadResult--;
+                    pos--;
                 }
-                lookaheadStatus = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
-                lookaheadTagIdx = stateTable[row + RBBIDataWrapper.TAGIDX];
-                continue;
+                fLookAheadMatches.setPosition(rule, pos);
             }
 
-            if (stateTable[row + RBBIDataWrapper.ACCEPTING] != 0) {
-                // Because this is an accepting state, any in-progress look-ahead match
-                //   is no longer relevant.  Clear out the pending lookahead status.
-                lookaheadStatus = 0; 
-            }
+
         }        // End of state machine main loop
 
         // The state machine is done.  Check whether it found a match...
@@ -1340,7 +1363,7 @@ public class RuleBasedBreakIterator extends BreakIterator {
         // If the iterator failed to advance in the match engine force it ahead by one.
         // This indicates a defect in the break rules, which should always match
         // at least one character.
-        
+
         if (result == initialPosition) {
             if (TRACE) {
                 System.out.println("Iterator did not move. Advancing by 1.");
@@ -1365,31 +1388,28 @@ public class RuleBasedBreakIterator extends BreakIterator {
         if (fText == null || stateTable == null) {
             return 0;
         }
-        
+
         int            state;
         int            category           = 0;
         int            mode;
-        int            row;        
+        int            row;
         int            c;
-        int            lookaheadStatus    = 0;
         int            result             = 0;
         int            initialPosition    = 0;
-        int            lookaheadResult    = 0;
-        boolean        lookAheadHardBreak = 
-            (fRData.getStateTableFlags(stateTable) & RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK) != 0;
-        
+        fLookAheadMatches.reset();
+
         // handlePrevious() never gets the rule status.
         // Flag the status as invalid; if the user ever asks for status, we will need
         // to back up, then re-find the break position using handleNext(), which does
         // get the status value.
         fLastStatusIndexValid = false;
         fLastRuleStatusIndex  = 0;
-        
+
         // set up the starting char
         initialPosition = fText.getIndex();
         result          = initialPosition;
         c               = previous32(fText);
-        
+
         // Set up the initial state for the state machine
         state = START_STATE;
         row = fRData.getRowIndex(state);
@@ -1399,129 +1419,95 @@ public class RuleBasedBreakIterator extends BreakIterator {
             category = 2;
             mode     = RBBI_START;
         }
-        
+
         if (TRACE) {
             System.out.println("Handle Prev   pos   char  state category ");
         }
-        
+
         // loop until we reach the beginning of the text or transition to state 0
         //
         mainLoop: for (;;) {
-            innerBlock: {
-                if (c == DONE32) {
-                    // Reached end of input string.
-                    if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
-                        // Either this is the old (ICU 3.2 and earlier) format data which
-                        // does not support explicit support for matching {eof}, or
-                        // we have already done the {eof} iteration.  Now is the time
-                        // to unconditionally bail out.
-                        if (lookaheadResult < result) {
-                            // We ran off the end of the string with a pending look-ahead match.
-                            // Treat this as if the look-ahead condition had been met, and return
-                            //  the match at the / position from the look-ahead rule.
-                            result = lookaheadResult;
-                            lookaheadStatus = 0;
-                        } else if (result == initialPosition) {
-                            // Ran off start, no match found.
-                            // Move one position (towards the start, since we are doing previous.)
-                            fText.setIndex(initialPosition);
-                            previous32(fText);
-                        }
-                        break mainLoop;
-                    }
-                    mode = RBBI_END;
-                    category = 1;
-                }
-                
-                if (mode == RBBI_RUN) {
-                    // look up the current character's category, which tells us
-                    // which column in the state table to look at.
-                    //
-                    category = (short) fRData.fTrie.getCodePointValue(c);
-                    
-                    // Check the dictionary bit in the character's category.
-                    //    Counter is only used by dictionary based iterators (subclasses).
-                    //    Chars that need to be handled by a dictionary have a flag bit set
-                    //    in their category values.
-                    //
-                    if ((category & 0x4000) != 0)  {
-                        fDictionaryCharCount++;
-                        //  And off the dictionary flag bit.
-                        category &= ~0x4000;
-                    }
-                }
-                
-                
-                if (TRACE) {
-                    System.out.print("             " + fText.getIndex() + "   ");
-                    if (0x20 <= c && c < 0x7f) {
-                        System.out.print("  " + c + "  ");
-                    } else {
-                        System.out.print(" " + Integer.toHexString(c) + " ");
+            if (c == DONE32) {
+                // Reached end of input string.
+                if (mode == RBBI_END || fRData.fHeader.fVersion == 1) {
+                    // Either this is the old (ICU 3.2 and earlier) format data which
+                    // does not support explicit support for matching {eof}, or
+                    // we have already done the {eof} iteration.  Now is the time
+                    // to unconditionally bail out.
+                    if (result == initialPosition) {
+                        // Ran off start, no match found.
+                        // Move one position (towards the start, since we are doing previous.)
+                        fText.setIndex(initialPosition);
+                        previous32(fText);
                     }
-                    System.out.println(" " + state + "  " + category + " ");
+                    break mainLoop;
                 }
-                
-                // State Transition - move machine to its next state
+                mode = RBBI_END;
+                category = 1;
+            }
+
+            if (mode == RBBI_RUN) {
+                // look up the current character's category, which tells us
+                // which column in the state table to look at.
+                //
+                category = (short) fRData.fTrie.getCodePointValue(c);
+
+                // Check the dictionary bit in the character's category.
+                //    Counter is only used by dictionary based iterators (subclasses).
+                //    Chars that need to be handled by a dictionary have a flag bit set
+                //    in their category values.
                 //
-                state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
-                row = fRData.getRowIndex(state);
-                
-                if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
-                    // Match found, common case, could have lookahead so we move
-                    // on to check it
-                    result = fText.getIndex();
+                if ((category & 0x4000) != 0)  {
+                    fDictionaryCharCount++;
+                    //  And off the dictionary flag bit.
+                    category &= ~0x4000;
                 }
-                
-                if (stateTable[row + RBBIDataWrapper.LOOKAHEAD] != 0) {
-                    if (lookaheadStatus != 0
-                            && stateTable[row + RBBIDataWrapper.ACCEPTING] == lookaheadStatus) {
-                        // Lookahead match is completed. Set the result
-                        // accordingly, but only
-                        // if no other rule has matched further in the mean
-                        // time.
-                        result = lookaheadResult;
-                        lookaheadStatus = 0;
-                        // TODO: make a stand-alone hard break in a rule work.
-                        
-                        if (lookAheadHardBreak) {
-                            break mainLoop;
-                        }
-                        // Look-ahead completed, but other rules may match further.
-                        // Continue on.
-                        // TODO: junk this feature?  I don't think that it's used anywhere.
-                        break innerBlock;
-                    }
-                    // Hit a possible look-ahead match. We are at the
-                    // position of the '/'. Remember this position.
-                    lookaheadResult = fText.getIndex();
-                    lookaheadStatus = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
-                    break innerBlock;
-                } 
-                
-                // not lookahead...
-                if (stateTable[row + RBBIDataWrapper.ACCEPTING] != 0) {
-                    // This is a plain (non-look-ahead) accepting state.
-                    if (!lookAheadHardBreak) {
-                        // Clear out any pending look-ahead matches,
-                        // but only if not doing the lookAheadHardBreak option
-                        // which needs to force a break no matter what is going
-                        // on with the rest of the match, i.e. we can't abandon
-                        // a partially completed look-ahead match because
-                        // some other rule matched further than the '/' position
-                        // in the look-ahead match.
-                        lookaheadStatus = 0; 
-                    }
+            }
+
+
+            if (TRACE) {
+                System.out.print("             " + fText.getIndex() + "   ");
+                if (0x20 <= c && c < 0x7f) {
+                    System.out.print("  " + c + "  ");
+                } else {
+                    System.out.print(" " + Integer.toHexString(c) + " ");
+                }
+                System.out.println(" " + state + "  " + category + " ");
+            }
+
+            // State Transition - move machine to its next state
+            //
+            state = stateTable[row + RBBIDataWrapper.NEXTSTATES + category];
+            row = fRData.getRowIndex(state);
+
+            if (stateTable[row + RBBIDataWrapper.ACCEPTING] == -1) {
+                // Match found, common case, could have lookahead so we move
+                // on to check it
+                result = fText.getIndex();
+            }
+
+
+            int completedRule = stateTable[row + RBBIDataWrapper.ACCEPTING];
+            if (completedRule > 0) {
+                // Lookahead match is completed.
+                int lookaheadResult = fLookAheadMatches.getPosition(completedRule);
+                if (lookaheadResult >= 0) {
+                    result = lookaheadResult;
+                    break mainLoop;
                 }
-                
-            } // end of innerBlock.  "break innerBlock" in above code comes out here.
-        
-        
+            }
+            int rule = stateTable[row + RBBIDataWrapper.LOOKAHEAD];
+            if (rule != 0) {
+                // At the position of a '/' in a look-ahead match. Record it.
+                int pos = fText.getIndex();
+                fLookAheadMatches.setPosition(rule, pos);
+            }
+
             if (state == STOP_STATE) {
                 // Normal loop exit is here
                 break mainLoop;
             }
-        
+
             // then move iterator position backwards one character
             //
             if (mode == RBBI_RUN) {
@@ -1531,10 +1517,10 @@ public class RuleBasedBreakIterator extends BreakIterator {
                     mode = RBBI_RUN;
                 }
             }
-        
-        
+
+
         }   // End of the main loop.
-        
+
         // The state machine is done.  Check whether it found a match...
         //
         // If the iterator failed to advance in the match engine, force it ahead by one.
@@ -1545,12 +1531,12 @@ public class RuleBasedBreakIterator extends BreakIterator {
             previous32(fText);
             result = fText.getIndex();
         }
-        
+
         fText.setIndex(result);
         if (TRACE) {
             System.out.println("Result = " + result);
         }
-        
+
         return result;
     }
 }
diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar
index fe5766bccc7..31108dd39f3 100755
--- a/icu4j/main/shared/data/icudata.jar
+++ b/icu4j/main/shared/data/icudata.jar
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:848a445cb828689cd5bca20bfd321db5503ef66c0a94d929fc108a28d0c5595f
-size 11754757
+oid sha256:eb9182edec08706f02236909aaefcbf4c98d29d6415d1a8801633233c74f03fb
+size 11789631
diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar
index 132d2c7fe68..b966701e305 100755
--- a/icu4j/main/shared/data/icutzdata.jar
+++ b/icu4j/main/shared/data/icutzdata.jar
@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a75dfbe25f7671a65bb933aed49a71eb9a923767687625982603c54860478ce7
+oid sha256:cefefda6f12f61e7dcd7767a7b07b0fea3ca53c2a9b1524f3627e94cad6f3ee0
 size 90259
diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
index 70f7edda17d..d217399fd20 100644
--- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
+++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
@@ -1,6 +1,6 @@
 /*
  *******************************************************************************
- * Copyright (C) 2003-2015 International Business Machines Corporation and
+ * Copyright (C) 2003-2016 International Business Machines Corporation and
  * others. All Rights Reserved.
  *******************************************************************************
  */
@@ -26,18 +26,18 @@ import com.ibm.icu.text.UnicodeSet;
  * Monkey tests for RBBI.  These tests have independent implementations of
  * the Unicode TR boundary rules, and compare results between these and ICU's
  * implementation, using random data.
- * 
+ *
  * Tests cover Grapheme Cluster (char), Word and Line breaks
- * 
+ *
  * Ported from ICU4C, original code in file source/test/intltest/rbbitst.cpp
  *
  */
 public class RBBITestMonkey extends TestFmwk {
-    
+
     public static void main(String[] args) {
         new RBBITestMonkey().run(args);
     }
-    
+
 //
 //     classs RBBIMonkeyKind
 //
@@ -49,7 +49,7 @@ public class RBBITestMonkey extends TestFmwk {
 //        testing, but works purely in terms of the interface defined here.
 //
     abstract static class RBBIMonkeyKind {
-    
+
         // Return a List of UnicodeSets, representing the character classes used
         //   for this type of iterator.
         abstract  List  charClasses();
@@ -60,14 +60,14 @@ public class RBBITestMonkey extends TestFmwk {
         // Find the next break position, starting from the specified position.
         // Return -1 after reaching end of string.
         abstract   int   next(int i);
-        
+
         // A Character Property, one of the constants defined in class UProperty.
         //   The value of this property will be displayed for the characters
-        //    near any test failure.  
+        //    near any test failure.
         int   fCharProperty;
     }
 
- 
+
     /**
      * Monkey test subclass for testing Character (Grapheme Cluster) boundaries.
      * Note: As of Unicode 6.1, fPrependSet is empty, so don't add it to fSets
@@ -88,6 +88,11 @@ public class RBBITestMonkey extends TestFmwk {
         UnicodeSet                fLVTSet;
         UnicodeSet                fHangulSet;
         UnicodeSet                fAnySet;
+        UnicodeSet                fEmojiModifierSet;
+        UnicodeSet                fEmojiBaseSet;
+        UnicodeSet                fZWJSet;
+        UnicodeSet                fGAZSet;
+
 
         StringBuffer              fText;
 
@@ -96,8 +101,8 @@ public class RBBITestMonkey extends TestFmwk {
         fText       = null;
         fCharProperty = UProperty.GRAPHEME_CLUSTER_BREAK;
         fCRLFSet    = new UnicodeSet("[\\r\\n]");
-        fControlSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Control}]");
-        fExtendSet  = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Extend}]");
+        fControlSet = new UnicodeSet("[[\\p{Grapheme_Cluster_Break = Control}-[:Block=Tags:]]]");
+        fExtendSet  = new UnicodeSet("[[\\p{Grapheme_Cluster_Break = Extend}][:Block=Tags:]]");
         fRegionalIndicatorSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]");
         fPrependSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = Prepend}]");
         fSpacingSet = new UnicodeSet("[\\p{Grapheme_Cluster_Break = SpacingMark}]");
@@ -115,6 +120,17 @@ public class RBBITestMonkey extends TestFmwk {
 
         fAnySet     = new UnicodeSet("[\\u0000-\\U0010ffff]");
 
+        fEmojiBaseSet = new UnicodeSet(
+                "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
+                        + "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
+                        + "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
+                        + "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]");
+
+        fEmojiModifierSet = new UnicodeSet(0x0001F3FB, 0x0001F3FF);
+        fZWJSet           = new UnicodeSet(0x200D, 0x200D);
+        fGAZSet           = new UnicodeSet("[\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8\\u2764]");
+
+
         fSets       = new ArrayList();
         fSets.add(fCRLFSet);
         fSets.add(fControlSet);
@@ -126,44 +142,49 @@ public class RBBITestMonkey extends TestFmwk {
         fSets.add(fSpacingSet);
         fSets.add(fHangulSet);
         fSets.add(fAnySet);
+        fSets.add(fEmojiBaseSet);
+        fSets.add(fEmojiModifierSet);
+        fSets.add(fZWJSet);
+        fSets.add(fGAZSet);
      }
 
 
     void setText(StringBuffer s) {
         fText = s;
     }
-    
+
     List charClasses() {
         return fSets;
     }
-    
+
     int next(int prevPos) {
-        int    p1, p2, p3;    // Indices of the significant code points around the
-                              //   break position being tested.  The candidate break
-                              //   location is before p2.
-    
+        int    p0, p1, p2, p3;    // Indices of the significant code points around the
+                                  //   break position being tested.  The candidate break
+                                  //   location is before p2.
+
         int     breakPos = -1;
-    
-        int   c1, c2, c3;     // The code points at p0, p1, p2 & p3.
-        
+
+        int   c0, c1, c2, c3;     // The code points at p0, p1, p2 & p3.
+
         // Previous break at end of string.  return DONE.
         if (prevPos >= fText.length()) {
             return -1;
         }
-        p1 = p2 = p3 = prevPos;
+        p0 = p1 = p2 = p3 = prevPos;
         c3 =  UTF16.charAt(fText, prevPos);
-        c1 = c2 = 0;
-    
+        c0 = c1 = c2 = 0;
+
         // Loop runs once per "significant" character position in the input text.
         for (;;) {
             // Move all of the positions forward in the input string.
+            p0 = p1;  c0 = c1;
             p1 = p2;  c1 = c2;
             p2 = p3;  c2 = c3;
-    
+
             // Advance p3 by one codepoint
             p3 = moveIndex32(fText, p3, 1);
             c3 = (p3>=fText.length())? -1: UTF16.charAt(fText, p3);
-    
+
             if (p1 == p2) {
                 // Still warming up the loop.  (won't work with zero length strings, but we don't care)
                 continue;
@@ -172,7 +193,7 @@ public class RBBITestMonkey extends TestFmwk {
                 // Reached end of string.  Always a break position.
                 break;
             }
-    
+
             // Rule  GB3   CR x LF
             //     No Extend or Format characters may appear between the CR and LF,
             //     which requires the additional check for p2 immediately following p1.
@@ -180,14 +201,14 @@ public class RBBITestMonkey extends TestFmwk {
             if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
                 continue;
             }
-    
+
             // Rule (GB4).   ( Control | CR | LF ) <break>
             if (fControlSet.contains(c1) ||
                 c1 == 0x0D ||
                 c1 == 0x0A)  {
                 break;
             }
-    
+
             // Rule (GB5)    <break>  ( Control | CR | LF )
             //
             if (fControlSet.contains(c2) ||
@@ -195,8 +216,8 @@ public class RBBITestMonkey extends TestFmwk {
                 c2 == 0x0A)  {
                 break;
             }
-    
-    
+
+
             // Rule (GB6)  L x ( L | V | LV | LVT )
             if (fLSet.contains(c1) &&
                 (fLSet.contains(c2)  ||
@@ -205,43 +226,61 @@ public class RBBITestMonkey extends TestFmwk {
                     fLVTSet.contains(c2))) {
                 continue;
             }
-    
+
             // Rule (GB7)    ( LV | V )  x  ( V | T )
             if ((fLVSet.contains(c1) || fVSet.contains(c1)) &&
                 (fVSet.contains(c2) || fTSet.contains(c2)))  {
                 continue;
             }
-    
+
             // Rule (GB8)    ( LVT | T)  x T
             if ((fLVTSet.contains(c1) || fTSet.contains(c1)) &&
                 fTSet.contains(c2))  {
                 continue;
             }
-    
+
             // Rule (GB8a)   Regional_Indicator x Regional_Indicator
+            //                Note: The first if condition is a little tricky. We only need to force
+            //                      a break if there are three or more contiguous RIs. If there are
+            //                      only two, a break following will occur via other rules, and will include
+            //                      any trailing extend characters, which is needed behavior.
+            if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)
+                    && fRegionalIndicatorSet.contains(c2)) {
+                break;
+            }
+
             if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
                 continue;
             }
-            
-            // Rule (GB9)    Numeric x ALetter
-            if (fExtendSet.contains(c2))  {
+
+            // Rule (GB9)    x Extend
+            if (fExtendSet.contains(c2) || fZWJSet.contains(c2))  {
                 continue;
             }
-            
+
             // Rule (GB9a)   x  SpacingMark
             if (fSpacingSet.contains(c2)) {
                 continue;
             }
-    
+
             // Rule (GB9b)   Prepend x
             if (fPrependSet.contains(c1)) {
                 continue;
             }
-    
+            // Rule (GB9c)   Emoji_Base x Emoji_Modifier
+            if ((fEmojiBaseSet.contains(c1) || fGAZSet.contains(c1)) && fEmojiModifierSet.contains(c2)) {
+                continue;
+            }
+
+            // Rule (GB9d)   ZWJ x Glue_After_Zwj
+            if (fZWJSet.contains(c1) && fGAZSet.contains(c2)) {
+                continue;
+            }
+
             // Rule (GB10)  Any  <break>  Any
             break;
         }
-    
+
         breakPos = p2;
         return breakPos;
         }
@@ -249,11 +288,11 @@ public class RBBITestMonkey extends TestFmwk {
 
 
     /**
-     * 
+     *
      * Word Monkey Test Class
      *
-     * 
-     * 
+     *
+     *
      */
     static class RBBIWordMonkey extends RBBIMonkeyKind {
         List                      fSets;
@@ -275,10 +314,14 @@ public class RBBITestMonkey extends TestFmwk {
         UnicodeSet                fFormatSet;
         UnicodeSet                fExtendSet;
         UnicodeSet                fExtendNumLetSet;
-        UnicodeSet                fOtherSet;        
+        UnicodeSet                fOtherSet;
         UnicodeSet                fDictionaryCjkSet;
+        UnicodeSet                fEBaseSet;
+        UnicodeSet                fEModifierSet;
+        UnicodeSet                fZWSSet;
+        UnicodeSet                fGAZSet;
+
 
-        
         RBBIWordMonkey() {
             fCharProperty    = UProperty.WORD_BREAK;
 
@@ -286,13 +329,13 @@ public class RBBITestMonkey extends TestFmwk {
             fCRSet           = new UnicodeSet("[\\p{Word_Break = CR}]");
             fLFSet           = new UnicodeSet("[\\p{Word_Break = LF}]");
             fNewlineSet      = new UnicodeSet("[\\p{Word_Break = Newline}]");
-            fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");            
+            fRegionalIndicatorSet = new UnicodeSet("[\\p{Word_Break = Regional_Indicator}]");
             fKatakanaSet     = new UnicodeSet("[\\p{Word_Break = Katakana}]");
-            fHebrew_LetterSet = new UnicodeSet("[\\p{Word_Break = Hebrew_Letter}]");            
+            fHebrew_LetterSet = new UnicodeSet("[\\p{Word_Break = Hebrew_Letter}]");
             fALetterSet      = new UnicodeSet("[\\p{Word_Break = ALetter}]");
             fALetterSet.removeAll(fDictionaryCjkSet);
             fSingle_QuoteSet = new UnicodeSet("[\\p{Word_Break = Single_Quote}]");
-            fDouble_QuoteSet = new UnicodeSet("[\\p{Word_Break = Double_Quote}]");           
+            fDouble_QuoteSet = new UnicodeSet("[\\p{Word_Break = Double_Quote}]");
             fMidNumLetSet    = new UnicodeSet("[\\p{Word_Break = MidNumLet}]");
             fMidLetterSet    = new UnicodeSet("[\\p{Word_Break = MidLetter}]");
             fMidNumSet       = new UnicodeSet("[\\p{Word_Break = MidNum}]");
@@ -300,6 +343,16 @@ public class RBBITestMonkey extends TestFmwk {
             fFormatSet       = new UnicodeSet("[\\p{Word_Break = Format}]");
             fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]");
             fExtendSet       = new UnicodeSet("[\\p{Word_Break = Extend}]");
+            fEBaseSet         = new UnicodeSet(
+                    "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
+                    + "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
+                    + "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
+                    + "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]");
+
+            fEModifierSet    = new UnicodeSet("[\\U0001F3FB-\\U0001F3FF]");
+            fZWSSet          = new UnicodeSet(0x200D, 0x200D);
+            fGAZSet          = new UnicodeSet("[\\U0001F466-\\U0001F469\\U0001F48B\\U0001F5E8\\u2764]");
+            fExtendSet.removeAll(fZWSSet);
 
             fOtherSet        = new UnicodeSet();
             fOtherSet.complement();
@@ -318,6 +371,11 @@ public class RBBITestMonkey extends TestFmwk {
             fOtherSet.removeAll(fExtendSet);
             fOtherSet.removeAll(fExtendNumLetSet);
             fOtherSet.removeAll(fRegionalIndicatorSet);
+            fOtherSet.removeAll(fEBaseSet);
+            fOtherSet.removeAll(fEModifierSet);
+            fOtherSet.removeAll(fZWSSet);
+            fOtherSet.removeAll(fGAZSet);
+
             // Inhibit dictionary characters from being tested at all.
             // remove surrogates so as to not generate higher CJK characters
             fOtherSet.removeAll(new UnicodeSet("[[\\p{LineBreak = Complex_Context}][:Line_Break=Surrogate:]]"));
@@ -342,24 +400,24 @@ public class RBBITestMonkey extends TestFmwk {
             fSets.add(fExtendNumLetSet);
             fSets.add(fOtherSet);
         }
-        
-        
+
+
         List  charClasses() {
-         return fSets;  
+         return fSets;
+        }
+
+        void   setText(StringBuffer s) {
+            fText = s;
         }
-        
-        void   setText(StringBuffer s) { 
-            fText = s;        
-        }   
 
-        int   next(int prevPos) {  
-            int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the 
+        int   next(int prevPos) {
+            int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the
                                         //   break position being tested.  The candidate break
                                         //   location is before p2.
             int     breakPos = -1;
-            
+
             int c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
-            
+
             // Previous break at end of string.  return DONE.
             if (prevPos >= fText.length()) {
                 return -1;
@@ -367,8 +425,8 @@ public class RBBITestMonkey extends TestFmwk {
             /*p0 =*/ p1 = p2 = p3 = prevPos;
             c3 = UTF16.charAt(fText, prevPos);
             c0 = c1 = c2 = 0;
-            
-            
+
+
 
             // Loop runs once per "significant" character position in the input text.
             for (;;) {
@@ -376,7 +434,7 @@ public class RBBITestMonkey extends TestFmwk {
                 /*p0 = p1;*/  c0 = c1;
                 p1 = p2;  c1 = c2;
                 p2 = p3;  c2 = c3;
-                
+
                 // Advance p3 by    X(Extend | Format)*   Rule 4
                 //    But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
                 do {
@@ -390,7 +448,7 @@ public class RBBITestMonkey extends TestFmwk {
                         break;
                     }
                 }
-                while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3));
+                while (setContains(fFormatSet, c3) || setContains(fExtendSet, c3) || setContains(fZWSSet, c3));
 
                 if (p1 == p2) {
                     // Still warming up the loop.  (won't work with zero length strings, but we don't care)
@@ -408,7 +466,7 @@ public class RBBITestMonkey extends TestFmwk {
                 if (c1==0x0D && c2==0x0A) {
                     continue;
                 }
-                
+
                 // Rule (3a)  Break before and after newlines (including CR and LF)
                 //
                 if (fCRSet.contains(c1) || fLFSet.contains(c1) || fNewlineSet.contains(c1)) {
@@ -418,12 +476,19 @@ public class RBBITestMonkey extends TestFmwk {
                     break;
                 }
 
+                // Rule (3c)    ZWJ x GAZ (Glue after ZWJ).
+                //              Not ignoring extend chars, so peek into input text to
+                //              get the potential ZWJ, the character immediately preceding c2.
+               if (fZWSSet.contains(fText.codePointBefore(p2)) && fGAZSet.contains(c2)) {
+                    continue;
+                }
+
                 // Rule (5).   (ALetter | Hebrew_Letter) x (ALetter | Hebrew_Letter)
                 if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
                     (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2)))  {
                     continue;
                 }
-               
+
                 // Rule (6)  (ALetter | Hebrew_Letter)  x  (MidLetter | MidNumLet | Single_Quote) (ALetter | Hebrew_Letter)
                 //
                 if ( (fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1))   &&
@@ -453,13 +518,13 @@ public class RBBITestMonkey extends TestFmwk {
                 if (fHebrew_LetterSet.contains(c0) && fDouble_QuoteSet.contains(c1) && fHebrew_LetterSet.contains(c2)) {
                     continue;
                 }
-                
+
                 //  Rule (8)    Numeric x Numeric
                 if (fNumericSet.contains(c1) &&
                         fNumericSet.contains(c2))  {
                     continue;
                 }
-                
+
                 // Rule (9)    (ALetter | Hebrew_Letter) x Numeric
                 if ((fALetterSet.contains(c1) || fHebrew_LetterSet.contains(c1)) &&
                     fNumericSet.contains(c2))  {
@@ -478,14 +543,14 @@ public class RBBITestMonkey extends TestFmwk {
                         fNumericSet.contains(c2)) {
                     continue;
                 }
-                
+
                 // Rule (12)  Numeric x (MidNum | MidNumLet | SingleQuote) Numeric
                 if (fNumericSet.contains(c1) &&
                     (fMidNumSet.contains(c2) || fMidNumLetSet.contains(c2) || fSingle_QuoteSet.contains(c2))  &&
                     setContains(fNumericSet, c3)) {
                     continue;
                 }
-                
+
                 // Rule (13)  Katakana x Katakana
                 if (fKatakanaSet.contains(c1) &&
                         fKatakanaSet.contains(c2))  {
@@ -498,7 +563,7 @@ public class RBBITestMonkey extends TestFmwk {
                         fExtendNumLetSet.contains(c2)) {
                     continue;
                 }
-                
+
                 // Rule 13b   ExtendNumLet x (ALetter | Hebrew_Letter | Numeric | Katakana)
                 if (fExtendNumLetSet.contains(c1) &&
                         (fALetterSet.contains(c2) || fHebrew_LetterSet.contains(c2) ||
@@ -506,32 +571,40 @@ public class RBBITestMonkey extends TestFmwk {
                     continue;
                 }
 
-                
-                // Rule 13c   Do not break between Regional Indicators. 
+
+                // Rule 13c   Do not break between Regional Indicators.
                 //            Regional_Indicator  Ã   Regional_Indicator
+                if (fRegionalIndicatorSet.contains(c0) && fRegionalIndicatorSet.contains(c1)) {
+                    break;
+                }
                 if (fRegionalIndicatorSet.contains(c1) && fRegionalIndicatorSet.contains(c2)) {
                     continue;
                 }
-                
+
+                // Rule 13d
+                if ((fEBaseSet.contains(c1)  || fGAZSet.contains(c1)) && fEModifierSet.contains(c2)) {
+                    continue;
+                }
+
                 // Rule 14.  Break found here.
                 break;
             }
-            
+
             breakPos = p2;
             return breakPos;
         }
-        
+
     }
 
- 
+
     static class RBBILineMonkey extends RBBIMonkeyKind {
-        
+
         List        fSets;
-        
+
         // UnicodeSets for each of the Line Breaking character classes.
         // Order matches that of Unicode UAX 14, Table 1, which makes it a little easier
         // to verify that they are all accounted for.
-        
+
         UnicodeSet  fBK;
         UnicodeSet  fCR;
         UnicodeSet  fLF;
@@ -570,19 +643,21 @@ public class RBBITestMonkey extends TestFmwk {
         UnicodeSet  fJV;
         UnicodeSet  fJT;
         UnicodeSet  fRI;
-        UnicodeSet  fSA;
         UnicodeSet  fXX;
-        
+        UnicodeSet  fEB;
+        UnicodeSet  fEM;
+        UnicodeSet  fZJ;
+
         StringBuffer  fText;
         int           fOrigPositions;
-        
-        
-        
+
+
+
         RBBILineMonkey()
         {
             fCharProperty  = UProperty.LINE_BREAK;
             fSets          = new ArrayList();
-            
+
             fBK    = new UnicodeSet("[\\p{Line_Break=BK}]");
             fCR    = new UnicodeSet("[\\p{Line_break=CR}]");
             fLF    = new UnicodeSet("[\\p{Line_break=LF}]");
@@ -621,23 +696,33 @@ public class RBBITestMonkey extends TestFmwk {
             fJV    = new UnicodeSet("[\\p{Line_break=JV}]");
             fJT    = new UnicodeSet("[\\p{Line_break=JT}]");
             fRI    = new UnicodeSet("[\\p{Line_break=RI}]");
-            fSA    = new UnicodeSet("[\\p{Line_break=SA}]");
             fXX    = new UnicodeSet("[\\p{Line_break=XX}]");
+            fEB    = new UnicodeSet(
+                    "[\\u261D\\u26F9\\u270A-\\u270D\\U0001F385\\U0001F3C3-\\U0001F3C4\\U0001F3CA-\\U0001F3CB\\U0001F442-\\U0001F443"
+                    + "\\U0001F446-\\U0001F450\\U0001F466-\\U0001F469\\U0001F46E\\U0001F470-\\U0001F478\\U0001F47C\\U0001F481-\\U0001F483"
+                    + "\\U0001F485-\\U0001F487\\U0001F4AA\\U0001F575\\U0001F590\\U0001F595-\\U0001F596\\U0001F645-\\U0001F647"
+                    + "\\U0001F64B-\\U0001F64F\\U0001F6A3\\U0001F6B4-\\U0001F6B6\\U0001F6C0\\U0001F918]");
+            fEM    = new UnicodeSet("[\\U0001F3FB-\\U0001F3FF]");
+            fZJ    = new UnicodeSet(0x200D, 0x200D);
 
             // Remove dictionary characters.
             // The monkey test reference implementation of line break does not replicate the dictionary behavior,
             // so dictionary characters are omitted from the monkey test data.
             UnicodeSet dictionarySet = new UnicodeSet(
                     "[[:LineBreak = Complex_Context:] & [[:Script = Thai:][:Script = Lao:][:Script = Khmer:] [:script = Myanmar:]]]");
-            fSA.removeAll(dictionarySet);
 
             fAL.addAll(fXX);     // Default behavior for XX is identical to AL
             fAL.addAll(fAI);     // Default behavior for AI is identical to AL
-            fAL.addAll(fSA);     // Default behavior for SA is XX, which defaults to AL
             fAL.addAll(fSG);     // Default behavior for SG (unpaired surrogates) is AL
-            
+
             fNS.addAll(fCJ);     // Default behavior for CJ is identical to NS.
-                        
+
+            fID.addAll(fEB);     // Emoji Base and Emoji Modifier behave as ID.
+            fID.addAll(fEM);
+            fAL.removeAll(fEM);
+            fAL.remove(0x2764);   // Emoji Proposal: move u2764 from AL to ID
+            fID.add(0x2764);
+
             fSets.add(fBK);
             fSets.add(fCR);
             fSets.add(fLF);
@@ -674,47 +759,50 @@ public class RBBITestMonkey extends TestFmwk {
             fSets.add(fHL);
             fSets.add(fID);
             fSets.add(fWJ);
-            fSets.add(fSA);
-            fSets.add(fSG);
             fSets.add(fRI);
+            fSets.add(fSG);
+            fSets.add(fEB);
+            fSets.add(fEM);
+            fSets.add(fZJ);
+
         }
-        
+
         void setText(StringBuffer s) {
             fText       = s;
         }
-        
-        
-        
+
+
+
 
         int next(int startPos) {
             int    pos;       //  Index of the char following a potential break position
             int    thisChar;  //  Character at above position "pos"
-            
+
             int    prevPos;   //  Index of the char preceding a potential break position
             int    prevChar;  //  Character at above position.  Note that prevChar
                               //   and thisChar may not be adjacent because combining
                               //   characters between them will be ignored.
             int    prevCharX2; //  Character before prevChar, more contex for LB 21a
-            
+
             int    nextPos;   //  Index of the next character following pos.
                               //     Usually skips over combining marks.
             int    tPos;      //  temp value.
             int    matchVals[]  = null;       // Number  Expression Match Results
- 
-            
+
+
             if (startPos >= fText.length()) {
                 return -1;
             }
-            
-            
+
+
             // Initial values for loop.  Loop will run the first time without finding breaks,
             //                           while the invalid values shift out and the "this" and
             //                           "prev" positions are filled in with good values.
             pos      = prevPos   = -1;    // Invalid value, serves as flag for initial loop iteration.
             thisChar = prevChar  = prevCharX2 = 0;
             nextPos  = startPos;
-            
-            
+
+
             // Loop runs once per position in the test text, until a break position
             //  is found.  In each iteration, we are testing for a possible break
             //  just preceding the character at index "pos".  The character preceding
@@ -727,28 +815,28 @@ public class RBBITestMonkey extends TestFmwk {
                 prevChar  = thisChar;
                 pos       = nextPos;
                 nextPos   = moveIndex32(fText, pos, 1);
-                
+
                 // Rule LB2 - Break at end of text.
                 if (pos >= fText.length()) {
                     break;
                 }
-                
+
                 // Rule LB 9 - adjust for combining sequences.
                 //             We do this rule out-of-order because the adjustment does
                 //             not effect the way that rules LB 3 through LB 6 match,
                 //             and doing it here rather than after LB 6 is substantially
                 //             simpler when combining sequences do occur.
-                
-                
+
+
                 // LB 9         Keep combining sequences together.
-                //              advance over any CM class chars at "pos", 
+                //              advance over any CM class chars at "pos",
                 //              result is "nextPos" for the following loop iteration.
                 thisChar  = UTF16.charAt(fText, pos);
                 if (!(fSP.contains(thisChar) || fBK.contains(thisChar) || thisChar==0x0d ||
                         thisChar==0x0a || fNL.contains(thisChar) || fZW.contains(thisChar) )) {
                     for (;;) {
                         if (nextPos == fText.length()) {
-                            break;   
+                            break;
                         }
                         int nextChar = UTF16.charAt(fText, nextPos);
                         if (!fCM.contains(nextChar)) {
@@ -757,28 +845,28 @@ public class RBBITestMonkey extends TestFmwk {
                         nextPos = moveIndex32(fText, nextPos, 1);
                     }
                 }
-                
+
                 // LB 9 Treat X CM* as if it were X
                 //        No explicit action required.
-                
+
                 // LB 10     Treat any remaining combining mark as AL
                 if (fCM.contains(thisChar)) {
-                    thisChar = 'A';   
+                    thisChar = 'A';
                 }
 
-                
+
                 // If the loop is still warming up - if we haven't shifted the initial
                 //   -1 positions out of prevPos yet - loop back to advance the
                 //    position in the input without any further looking for breaks.
                 if (prevPos == -1) {
                     continue;
                 }
-                
+
                 // LB 4  Always break after hard line breaks,
                 if (fBK.contains(prevChar)) {
                     break;
                 }
-                
+
                 // LB 5  Break after CR, LF, NL, but not inside CR LF
                 if (fCR.contains(prevChar) && fLF.contains(thisChar)) {
                     continue;
@@ -788,46 +876,57 @@ public class RBBITestMonkey extends TestFmwk {
                      fNL.contains(prevChar))  {
                     break;
                 }
-                
+
                 // LB 6  Don't break before hard line breaks
                 if (fBK.contains(thisChar) || fCR.contains(thisChar) ||
                         fLF.contains(thisChar) || fNL.contains(thisChar) ) {
                     continue;
                 }
-                
-                
+
+
                 // LB 7  Don't break before spaces or zero-width space.
                 if (fSP.contains(thisChar)) {
                     continue;
                 }
-                
+
                 if (fZW.contains(thisChar)) {
                     continue;
                 }
-                
+
                 // LB 8  Break after zero width space
                 if (fZW.contains(prevChar)) {
                     break;
                 }
-                
+
+                // LB 8a ZJ x ID
+                //       The monkey test's way of ignoring combining characters doesn't work
+                //       for this rule. ZJ is also a CM. Need to get the actual character
+                //       preceding "thisChar", not ignoring combining marks, possibly ZJ.
+                {
+                    int prevC = fText.codePointBefore(pos);
+                    if (fZJ.contains(prevC) && fID.contains(thisChar)) {
+                        continue;
+                    }
+                }
+
                 //  LB 9, 10  Already done, at top of loop.
                 //
-                
-                
+
+
                 // LB 11
                 //    x  WJ
                 //    WJ  x
                 if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
                     continue;
                 }
-                
-                
+
+
                 // LB 12
                 //        GL x
                 if (fGL.contains(prevChar)) {
                     continue;
                 }
-                
+
                 // LB 12a
                 //    [^SP BA HY] x GL
                 if (!(fSP.contains(prevChar) ||
@@ -836,8 +935,8 @@ public class RBBITestMonkey extends TestFmwk {
                     continue;
                 }
 
-                
-                
+
+
                 // LB 13  Don't break before closings.
                 //       NU x CL, NU x CP  and NU x IS are not matched here so that they will
                 //       fall into LB 17 and the more general number regular expression.
@@ -849,7 +948,7 @@ public class RBBITestMonkey extends TestFmwk {
                     !fNU.contains(prevChar) && fSY.contains(thisChar))    {
                     continue;
                 }
-                
+
                 // LB 14  Don't break after OP SP*
                 //       Scan backwards, checking for this sequence.
                 //       The OP char could include combining marks, so we actually check for
@@ -866,8 +965,8 @@ public class RBBITestMonkey extends TestFmwk {
                 if (fOP.contains(UTF16.charAt(fText, tPos))) {
                     continue;
                 }
-                
-                // LB 15 Do not break within "[ 
+
+                // LB 15 Do not break within "[
                 //       QU CM* SP* x OP
                 if (fOP.contains(thisChar)) {
                     // Scan backwards from prevChar to see if it is preceded by QU CM* SP*
@@ -881,8 +980,8 @@ public class RBBITestMonkey extends TestFmwk {
                     if (fQU.contains(UTF16.charAt(fText, tPos))) {
                         continue;
                     }
-                }               
-                
+                }
+
                 // LB 16   (CL | CP) SP* x NS
                 if (fNS.contains(thisChar)) {
                     tPos = prevPos;
@@ -895,9 +994,9 @@ public class RBBITestMonkey extends TestFmwk {
                     if (fCL.contains(UTF16.charAt(fText, tPos)) || fCP.contains(UTF16.charAt(fText, tPos))) {
                         continue;
                     }
-                }               
-                
-                               
+                }
+
+
                 // LB 17        B2 SP* x B2
                 if (fB2.contains(thisChar)) {
                     tPos = prevPos;
@@ -910,25 +1009,25 @@ public class RBBITestMonkey extends TestFmwk {
                     if (fB2.contains(UTF16.charAt(fText, tPos))) {
                         continue;
                     }
-                }               
-                
+                }
+
                 // LB 18    break after space
                 if (fSP.contains(prevChar)) {
                     break;
                 }
-                
+
                 // LB 19
                 //    x   QU
                 //    QU  x
                 if (fQU.contains(thisChar) || fQU.contains(prevChar)) {
                     continue;
                 }
-                
+
                 // LB 20  Break around a CB
                 if (fCB.contains(thisChar) || fCB.contains(prevChar)) {
                     break;
                 }
-                
+
                 // LB 21
                 if (fBA.contains(thisChar) ||
                         fHY.contains(thisChar) ||
@@ -936,7 +1035,7 @@ public class RBBITestMonkey extends TestFmwk {
                         fBB.contains(prevChar) )   {
                     continue;
                 }
-                
+
                  // LB 21a, HL (HY | BA) x
                 if (fHL.contains(prevCharX2) && (fHY.contains(prevChar) || fBA.contains(prevChar))) {
                     continue;
@@ -946,7 +1045,7 @@ public class RBBITestMonkey extends TestFmwk {
                 if (fSY.contains(prevChar) && fHL.contains(thisChar)) {
                     continue;
                 }
-                
+
                // LB 22
                 if (fAL.contains(prevChar) && fIN.contains(thisChar) ||
                         fEX.contains(prevChar) && fIN.contains(thisChar) ||
@@ -956,8 +1055,8 @@ public class RBBITestMonkey extends TestFmwk {
                         fNU.contains(prevChar) && fIN.contains(thisChar) )   {
                     continue;
                 }
-                
-                
+
+
                 // LB 23    ID x PO    (Note:  Leading CM behaves like ID)
                 //          AL x NU
                 //          NU x AL
@@ -968,7 +1067,7 @@ public class RBBITestMonkey extends TestFmwk {
                         fNU.contains(prevChar) && fHL.contains(thisChar) )   {
                    continue;
                 }
-                
+
                 // LB 24  Do not break between prefix and letters or ideographs.
                 //        PR x ID
                 //        PR x AL
@@ -978,8 +1077,8 @@ public class RBBITestMonkey extends TestFmwk {
                     fPO.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar)))  {
                     continue;
                 }
-                
-                
+
+
                 // LB 25    Numbers
                 matchVals = LBNumberCheck(fText, prevPos, matchVals);
                 if (matchVals[0] != -1) {
@@ -995,7 +1094,7 @@ public class RBBITestMonkey extends TestFmwk {
                             nextPos = numEndIdx;
                             pos     = numEndIdx;
                             do {
-                                pos = moveIndex32(fText, pos, -1);  
+                                pos = moveIndex32(fText, pos, -1);
                                 thisChar = UTF16.charAt(fText, pos);
                             }
                             while (fCM.contains(thisChar));
@@ -1003,8 +1102,8 @@ public class RBBITestMonkey extends TestFmwk {
                         continue;
                     }
                 }
-                
-                
+
+
                 // LB 26  Do not break Korean Syllables
                 if (fJL.contains(prevChar) && (fJL.contains(thisChar) ||
                                                 fJV.contains(thisChar) ||
@@ -1039,18 +1138,18 @@ public class RBBITestMonkey extends TestFmwk {
                         continue;
                     }
 
-                
-                
+
+
                 // LB 28 Do not break between alphabetics
                 if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
                     continue;
                 }
-                
+
                 // LB 29  Do not break between numeric punctuation and alphabetics
                 if (fIS.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
                     continue;
                 }
-                
+
                 // LB 30    Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.
                 //          (AL | NU) x OP
                 //          CP x (AL | NU)
@@ -1061,20 +1160,29 @@ public class RBBITestMonkey extends TestFmwk {
                     continue;
                 }
 
-                // LB 30a   Do not break between regional indicators.  RI Ã RI
+                // LB 30a   Break between pairs of Regional Indicators.
+                //             RI RI <break> RI
+                //             RI    x    RI
+                if (fRI.contains(prevCharX2) && fRI.contains(prevChar) && fRI.contains(thisChar)) {
+                    break;
+                }
                 if (fRI.contains(prevChar) && fRI.contains(thisChar)) {
                     continue;
                 }
-                
+
+                // LB30b    Emoji Base x Emoji Modifier
+                if (fEB.contains(prevChar) && fEM.contains(thisChar)) {
+                    continue;
+                }
                 // LB 31    Break everywhere else
-                break;            
+                break;
             }
-            
+
             return pos;
         }
-        
-        
-        
+
+
+
         // Match the following regular expression in the input text.
         //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * ((CL | CP) CM*)?  (PR | PO) CM*)?
         //      0    0   1       3    3    4              7    7    7    7      9    9    9     11   11    (match states)
@@ -1090,15 +1198,15 @@ public class RBBITestMonkey extends TestFmwk {
             retVals[0]     = -1;  // Indicates no match.
             int matchState = 0;
             int idx        = startIdx;
-            
+
             matchLoop: for (idx = startIdx; idx<s.length(); idx = moveIndex32(s, idx, 1)){
                 int c = UTF16.charAt(s, idx);
                 int cLBType = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
                 switch (matchState) {
-                    case 0:   
+                    case 0:
                         if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC ||
                             cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
-                            matchState = 1;  
+                            matchState = 1;
                             break;
                         }
                         if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
@@ -1114,7 +1222,7 @@ public class RBBITestMonkey extends TestFmwk {
                             break;
                         }
                         break matchLoop;   /* No Match  */
-                        
+
                     case 1:
                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
                             matchState = 1;
@@ -1133,8 +1241,8 @@ public class RBBITestMonkey extends TestFmwk {
                             break;
                         }
                         break matchLoop;   /* No Match  */
-                        
-                        
+
+
                     case 4:
                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
                             matchState = 4;
@@ -1147,87 +1255,87 @@ public class RBBITestMonkey extends TestFmwk {
                         break matchLoop;   /* No Match  */
                         //    ((PR | PO) CM*)? ((OP | HY) CM*)? NU CM* ((NU | IS | SY) CM*) * (CL CM*)?  (PR | PO) CM*)?
                         //      0    0   1       3    3    4              7    7    7    7      9   9     11   11    (match states)
-                 
+
                     case 7:
                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
                             matchState = 7;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.NUMERIC) {
                             matchState = 7;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
                             matchState = 7;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.BREAK_SYMBOLS) {
                             matchState = 7;
-                            break;       
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.CLOSE_PUNCTUATION) {
                             matchState = 9;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.CLOSE_PARENTHESIS) {
                             matchState = 9;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
                             matchState = 11;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
                             matchState = 11;
-                            break;                           
+                            break;
                         }
 
                         break matchLoop;    // Match Complete.
                     case 9:
                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
                             matchState = 9;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
                             matchState = 11;
-                            break;                           
+                            break;
                         }
                         if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
                             matchState = 11;
-                            break;                           
+                            break;
                         }
                         break matchLoop;    // Match Complete.
                     case 11:
                         if (cLBType == UCharacter.LineBreak.COMBINING_MARK) {
                             matchState = 11;
-                            break;                           
+                            break;
                         }
                         break matchLoop;    // Match Complete.
                 }
             }
             if (matchState > 4) {
-                retVals[0] = startIdx;   
-                 retVals[1] = idx;   
+                retVals[0] = startIdx;
+                 retVals[1] = idx;
             }
             return retVals;
         }
-        
-        
+
+
         List  charClasses() {
             return fSets;
         }
-        
-        
-    
+
+
+
     }
 
-     
+
     /**
-     * 
+     *
      * Sentence Monkey Test Class
      *
-     * 
-     * 
+     *
+     *
      */
     static class RBBISentenceMonkey extends RBBIMonkeyKind {
         List                 fSets;
@@ -1247,8 +1355,8 @@ public class RBBITestMonkey extends TestFmwk {
         UnicodeSet           fOtherSet;
         UnicodeSet           fExtendSet;
 
- 
-        
+
+
         RBBISentenceMonkey() {
             fCharProperty  = UProperty.SENTENCE_BREAK;
 
@@ -1301,26 +1409,26 @@ public class RBBITestMonkey extends TestFmwk {
             fSets.add(fOtherSet);
             fSets.add(fExtendSet);
         }
-        
-        
+
+
         List  charClasses() {
-            return fSets;  
+            return fSets;
+        }
+
+        void   setText(StringBuffer s) {
+            fText = s;
         }
-        
-        void   setText(StringBuffer s) { 
-            fText = s;        
-        }   
 
-        
+
         //      moveBack()   Find the "significant" code point preceding the index i.
         //      Skips over ($Extend | $Format)*
-        // 
+        //
         private int moveBack(int i) {
-            
+
             if (i <= 0) {
                 return -1;
             }
-            
+
             int      c;
             int      j = i;
             do {
@@ -1330,8 +1438,8 @@ public class RBBITestMonkey extends TestFmwk {
             while (j>0 &&(fFormatSet.contains(c) || fExtendSet.contains(c)));
             return j;
         }
-        
-        
+
+
         int moveForward(int i) {
             if (i>=fText.length()) {
                 return fText.length();
@@ -1344,9 +1452,9 @@ public class RBBITestMonkey extends TestFmwk {
             }
             while (c>=0 && (fFormatSet.contains(c) || fExtendSet.contains(c)));
             return j;
-           
+
         }
-        
+
         int cAt(int pos) {
             if (pos<0 || pos>=fText.length()) {
                 return -1;
@@ -1354,15 +1462,15 @@ public class RBBITestMonkey extends TestFmwk {
             return UTF16.charAt(fText, pos);
         }
 
-        int   next(int prevPos) {  
-            int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the 
+        int   next(int prevPos) {
+            int    /*p0,*/ p1, p2, p3;      // Indices of the significant code points around the
                                         //   break position being tested.  The candidate break
                                         //   location is before p2.
             int     breakPos = -1;
-            
+
             int c0, c1, c2, c3;         // The code points at p0, p1, p2 & p3.
             int c;
-            
+
             // Prev break at end of string.  return DONE.
             if (prevPos >= fText.length()) {
                 return -1;
@@ -1370,28 +1478,28 @@ public class RBBITestMonkey extends TestFmwk {
             /*p0 =*/ p1 = p2 = p3 = prevPos;
             c3 = UTF16.charAt(fText, prevPos);
             c0 = c1 = c2 = 0;
-            
+
             // Loop runs once per "significant" character position in the input text.
             for (;;) {
                 // Move all of the positions forward in the input string.
                 /*p0 = p1;*/  c0 = c1;
                 p1 = p2;  c1 = c2;
                 p2 = p3;  c2 = c3;
-                
+
                 // Advancd p3 by  X(Extend | Format)*   Rule 4
                 p3 = moveForward(p3);
                 c3 = cAt(p3);
-                
+
                 // Rule (3) CR x LF
                 if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
                     continue;
                 }
-                
+
                 // Rule (4)    Sep  <break>
                 if (fSepSet.contains(c1)) {
                     p2 = p1+1;   // Separators don't combine with Extend or Format
                     break;
-                }               
+                }
 
                 if (p2 >= fText.length()) {
                     // Reached end of string.  Always a break position.
@@ -1415,7 +1523,7 @@ public class RBBITestMonkey extends TestFmwk {
                 }
 
                 // Rule (8)  ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep))* Lower
-                //           Note:  Sterm | ATerm are added to the negated part of the expression by a 
+                //           Note:  Sterm | ATerm are added to the negated part of the expression by a
                 //                  note to the Unicode 5.0 documents.
                 int p8 = p1;
                 while (p8>0 && fSpSet.contains(cAt(p8))) {
@@ -1430,7 +1538,7 @@ public class RBBITestMonkey extends TestFmwk {
                         c = cAt(p8);
                         if (c==-1 || fOLetterSet.contains(c) || fUpperSet.contains(c) ||
                             fLowerSet.contains(c) || fSepSet.contains(c) ||
-                            fATermSet.contains(c) || fSTermSet.contains(c))  
+                            fATermSet.contains(c) || fSTermSet.contains(c))
                          {
                             break;
                         }
@@ -1440,7 +1548,7 @@ public class RBBITestMonkey extends TestFmwk {
                         continue;
                     }
                 }
-                
+
                 // Rule 8a  (STerm | ATerm) Close* Sp* x (SContinue | Sterm | ATerm)
                 if (fSContinueSet.contains(c2) || fSTermSet.contains(c2) || fATermSet.contains(c2)) {
                     p8 = p1;
@@ -1504,12 +1612,12 @@ public class RBBITestMonkey extends TestFmwk {
             breakPos = p2;
             return breakPos;
         }
-           
 
-        
+
+
     }
 
- 
+
     /**
      * Move an index into a string by n code points.
      *   Similar to UTF16.moveCodePointOffset, but without the exceptions, which were
@@ -1526,35 +1634,35 @@ public class RBBITestMonkey extends TestFmwk {
         if (amt>0) {
             for (i=0; i<amt; i++) {
                 if (pos >= s.length()) {
-                    return s.length();                   
+                    return s.length();
                 }
                 c = s.charAt(pos);
                 pos++;
                 if (UTF16.isLeadSurrogate(c) && pos < s.length()) {
                     c = s.charAt(pos);
                     if (UTF16.isTrailSurrogate(c)) {
-                        pos++;   
+                        pos++;
                     }
                 }
             }
         } else {
             for (i=0; i>amt; i--) {
                 if (pos <= 0) {
-                    return 0;   
+                    return 0;
                 }
                 pos--;
                 c = s.charAt(pos);
                 if (UTF16.isTrailSurrogate(c) && pos >= 0) {
                     c = s.charAt(pos);
                     if (UTF16.isLeadSurrogate(c)) {
-                        pos--;   
+                        pos--;
                     }
                 }
             }
         }
         return pos;
     }
-    
+
     /**
      * No-exceptions form of UnicodeSet.contains(c).
      *    Simplifies loops that terminate with an end-of-input character value.
@@ -1568,8 +1676,8 @@ public class RBBITestMonkey extends TestFmwk {
         }
         return s.contains(c);
     }
-    
-    
+
+
     /**
      * return the index of the next code point in the input text.
      * @param i the preceding index
@@ -1589,8 +1697,8 @@ public class RBBITestMonkey extends TestFmwk {
         }
         return retVal;
     }
-    
-    
+
+
     /**
      * random number generator.  Not using Java's built-in Randoms for two reasons:
      *    1.  Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
@@ -1641,7 +1749,7 @@ public class RBBITestMonkey extends TestFmwk {
 
             }
        }
-    
+
 /**
  *  Run a RBBI monkey test.  Common routine, for all break iterator types.
  *    Parameters:
@@ -1688,20 +1796,20 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
     //  Debugging settings.  Comment out everything in the following block for normal operation
     //
     //--------------------------------------------------------------------------------------------
-    // numIterations = -1;  
+    // numIterations = -1;
     // RuleBasedBreakIterator_New.fTrace = true;
     // m_seed = 859056465;
     // TESTSTRINGLEN = 50;
     // printTestData = true;
     // printBreaksFromBI = true;
     // ((RuleBasedBreakIterator_New)bi).dump();
-    
+
     //--------------------------------------------------------------------------------------------
     //
-    //  End of Debugging settings.  
+    //  End of Debugging settings.
     //
     //--------------------------------------------------------------------------------------------
-    
+
     int  dotsOnLine = 0;
      while (loopCount < numIterations || numIterations == -1) {
         if (numIterations == -1 && loopCount % 10 == 0) {
@@ -1720,7 +1828,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
         testText.setLength(0);
         // Populate a test string with data.
         if (printTestData) {
-            System.out.println("Test Data string ..."); 
+            System.out.println("Test Data string ...");
         }
         for (i=0; i<TESTSTRINGLEN; i++) {
             int        aClassNum = m_rand() % numCharClasses;
@@ -1736,7 +1844,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
             }
         }
         if (printTestData) {
-            System.out.println(); 
+            System.out.println();
         }
 
         Arrays.fill(expected, 0);
@@ -1746,7 +1854,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
         Arrays.fill(isBoundaryBreaks, false);
         Arrays.fill(followingBreaks, false);
         Arrays.fill(precedingBreaks, false);
- 
+
         // Calculate the expected results for this test string.
         mk.setText(testText);
         expectedCount = 0;
@@ -1773,7 +1881,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
 
         // Find the break positions using forward iteration
         if (printBreaksFromBI) {
-            System.out.println("Breaks from BI...");  
+            System.out.println("Breaks from BI...");
         }
         bi.setText(testText.toString());
         for (i=bi.first(); i != BreakIterator.DONE; i=bi.next()) {
@@ -1823,7 +1931,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
                 lastBreakPos = breakPos;
             }
         }
-        
+
         // Find the break positions using the preceding() function.
         lastBreakPos = testText.length();
         precedingBreaks[testText.length()] = true;
@@ -1844,7 +1952,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
             }
         }
 
-        
+
 
         // Compare the expected and actual results.
         for (i=0; i<=testText.length(); i++) {
@@ -1884,7 +1992,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
                 for (ci=0; ci<2; ci++) {  // Number of items to include in error text.
                     for (;;) {
                         if (endContext >= testText.length()) {break;}
-                        if (expectedBreaks[endContext-1]) { 
+                        if (expectedBreaks[endContext-1]) {
                             if (count == 0) break;
                             count --;
                         }
@@ -1910,7 +2018,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
                         String gc = UCharacter.getPropertyValueName(UProperty.GENERAL_CATEGORY, UCharacter.getType(c), UProperty.NameChoice.SHORT);
                         appendToBuf(errorText, gc, 8);
                         int extraProp = UCharacter.getIntPropertyValue(c, mk.fCharProperty);
-                        String extraPropValue = 
+                        String extraPropValue =
                             UCharacter.getPropertyValueName(mk.fCharProperty, extraProp, UProperty.NameChoice.LONG);
                         appendToBuf(errorText, extraPropValue, 20);
 
@@ -1925,7 +2033,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
                 errorText.append("</data>\n");
 
                 // Output the error
-                errln(name + " break monkey test error.  " + 
+                errln(name + " break monkey test error.  " +
                      (expectedBreaks[i]? "Break expected but not found." : "Break found but not expected.") +
                       "\nOperation = " + errorType + "; random seed = " + seed + ";  buf Idx = " + i + "\n" +
                       errorText);
@@ -1938,28 +2046,28 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
 }
 
 public void TestCharMonkey() {
-    
+
     int        loopCount = 500;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 10000;
     }
-    
+
     RBBICharMonkey  m = new RBBICharMonkey();
     BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
     RunMonkey(bi, m, "char", seed, loopCount);
 }
 
 public void TestWordMonkey() {
-    
+
     int        loopCount = 500;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 10000;
     }
-    
+
     logln("Word Break Monkey Test");
     RBBIWordMonkey  m = new RBBIWordMonkey();
     BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
@@ -1969,11 +2077,11 @@ public void TestWordMonkey() {
 public void TestLineMonkey() {
     int        loopCount = 500;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 10000;
     }
-    
+
     logln("Line Break Monkey Test");
     RBBILineMonkey  m = new RBBILineMonkey();
     BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
@@ -1984,14 +2092,14 @@ public void TestLineMonkey() {
 }
 
 public void TestSentMonkey() {
-    
+
     int        loopCount = 500;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 3000;
     }
-    
+
     logln("Sentence Break Monkey Test");
     RBBISentenceMonkey  m = new RBBISentenceMonkey();
     BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
@@ -2011,14 +2119,14 @@ public void TestSentMonkey() {
 //  rebuild break iterators from the original source rules.
 //
 public void TestRTCharMonkey() {
-    
+
     int        loopCount = 200;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 2000;
     }
-    
+
     RBBICharMonkey  m = new RBBICharMonkey();
     BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
     String rules = bi.toString();
@@ -2027,10 +2135,10 @@ public void TestRTCharMonkey() {
 }
 
 public void TestRTWordMonkey() {
-    
+
     int        loopCount = 200;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 2000;
     }
@@ -2045,11 +2153,11 @@ public void TestRTWordMonkey() {
 public void TestRTLineMonkey() {
     int        loopCount = 200;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 2000;
     }
-    
+
     logln("Line Break Monkey Test");
     RBBILineMonkey  m = new RBBILineMonkey();
     BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
@@ -2062,14 +2170,14 @@ public void TestRTLineMonkey() {
 }
 
 public void TestRTSentMonkey() {
-    
+
     int        loopCount = 200;
     int        seed      = 1;
-    
+
     if (params.inclusion >= 9) {
         loopCount = 1000;
     }
-    
+
     logln("Sentence Break Monkey Test");
     RBBISentenceMonkey  m = new RBBISentenceMonkey();
     BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
-- 
2.40.0