From 50fc9ce8784a5fc79fa844b5588d10348b86ef52 Mon Sep 17 00:00:00 2001
From: Andy Heninger <andy.heninger@gmail.com>
Date: Fri, 18 May 2018 17:47:03 +0000
Subject: [PATCH] ICU-13770 Line Break Rules for Unicode 11, tailorings
 updated.

X-SVN-Rev: 41410
---
 icu4c/source/data/brkitr/rules/line_fi.txt    | 26 ++++++--------
 icu4c/source/data/brkitr/rules/line_loose.txt | 26 ++++++--------
 .../data/brkitr/rules/line_loose_cj.txt       | 26 ++++++--------
 .../data/brkitr/rules/line_loose_fi.txt       | 26 ++++++--------
 .../source/data/brkitr/rules/line_normal.txt  | 26 ++++++--------
 .../data/brkitr/rules/line_normal_cj.txt      | 26 ++++++--------
 .../data/brkitr/rules/line_normal_fi.txt      | 26 ++++++--------
 .../test/testdata/break_rules/line_loose.txt  | 36 ++++++++++---------
 .../testdata/break_rules/line_loose_cj.txt    | 36 ++++++++++---------
 .../test/testdata/break_rules/line_normal.txt | 36 ++++++++++---------
 .../testdata/break_rules/line_normal_cj.txt   | 36 ++++++++++---------
 11 files changed, 146 insertions(+), 180 deletions(-)

diff --git a/icu4c/source/data/brkitr/rules/line_fi.txt b/icu4c/source/data/brkitr/rules/line_fi.txt
index 82d8cc202db..92eefffd91b 100644
--- a/icu4c/source/data/brkitr/rules/line_fi.txt
+++ b/icu4c/source/data/brkitr/rules/line_fi.txt
@@ -7,13 +7,10 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #         tailored as noted in 2nd paragraph below.
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
 #         This is only because of a limitation of ICU break engine implementation,
 #         not because the older behavior is desirable.
@@ -78,9 +75,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -151,9 +145,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -330,13 +324,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/data/brkitr/rules/line_loose.txt b/icu4c/source/data/brkitr/rules/line_loose.txt
index 2e937808b78..5e8f7a16703 100644
--- a/icu4c/source/data/brkitr/rules/line_loose.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose.txt
@@ -8,12 +8,9 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         tailored as noted in 2nd paragraph below.
 #
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
@@ -81,9 +78,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -154,9 +148,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -333,13 +327,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/data/brkitr/rules/line_loose_cj.txt b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
index 1f9dbcac85e..651924120a9 100644
--- a/icu4c/source/data/brkitr/rules/line_loose_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
@@ -7,12 +7,9 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         tailored as noted in 2nd paragraph below.
 #
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
@@ -91,9 +88,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -164,9 +158,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -347,13 +341,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/data/brkitr/rules/line_loose_fi.txt b/icu4c/source/data/brkitr/rules/line_loose_fi.txt
index f1161d1a6ef..3775b7f9ac9 100644
--- a/icu4c/source/data/brkitr/rules/line_loose_fi.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_fi.txt
@@ -7,13 +7,10 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #         tailored as noted in 3rd paragraph below.
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
 #         This is only because of a limitation of ICU break engine implementation,
 #         not because the older behavior is desirable.
@@ -77,9 +74,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -150,9 +144,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -332,13 +326,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/data/brkitr/rules/line_normal.txt b/icu4c/source/data/brkitr/rules/line_normal.txt
index 708082dd261..5bfbe8e8d9e 100644
--- a/icu4c/source/data/brkitr/rules/line_normal.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal.txt
@@ -7,12 +7,9 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         tailored as noted in 2nd paragraph below.
 #
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
@@ -76,9 +73,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -149,9 +143,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -325,13 +319,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/data/brkitr/rules/line_normal_cj.txt b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
index 58aa2fc9d80..4576b205e15 100644
--- a/icu4c/source/data/brkitr/rules/line_normal_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
@@ -7,12 +7,9 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         tailored as noted in 2nd paragraph below.
 #
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
@@ -79,9 +76,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -152,9 +146,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -331,13 +325,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/data/brkitr/rules/line_normal_fi.txt b/icu4c/source/data/brkitr/rules/line_normal_fi.txt
index d6f36bb1e2e..b5efc152aea 100644
--- a/icu4c/source/data/brkitr/rules/line_normal_fi.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_fi.txt
@@ -7,13 +7,10 @@
 #
 #         Line Breaking Rules
 #         Implement default line breaking as defined by
-#         Unicode Standard Annex #14 Revision 37 for Unicode 9.0
+#         Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #         http://www.unicode.org/reports/tr14/
 #         tailored as noted in 3rd paragraph below.
 #
-#         Includes extensions to the handling of emoji ZWJ sequences from
-#         https://goo.gl/cluFCn
-#
 #         TODO:  Rule LB 8 remains as it was in Unicode 5.2
 #         This is only because of a limitation of ICU break engine implementation,
 #         not because the older behavior is desirable.
@@ -76,9 +73,6 @@ $XX = [:LineBreak =  Unknown:];
 $ZW = [:LineBreak =  ZWSpace:];
 $ZWJ = [:LineBreak = ZWJ:];
 
-$EmojiNRK = [[\p{Emoji}] - [$RI \u002a\u00230-9Â©Â®â¢ã°ã½]];
-$Extended_Pict = [:ExtPict:];
-
 # By LB9, a ZWJ also behaves as a CM. Including it in the definition of CM avoids having to explicitly
 #         list it in the numerous rules that use CM.
 # By LB1, SA characters with general categor of Mn or Mc also resolve to CM.
@@ -149,9 +143,9 @@ $CAN_CM $CM*  [$SP $ZW];
 $LB8Breaks    = [$LB4Breaks $ZW];
 $LB8NonBreaks = [[$LB4NonBreaks] - [$ZW]];
 
-# LB 8a        ZWJ x (ID | Extended_Pict | EmojiNRK)   Emoji ZWJ sequences.
+# LB 8a        ZWJ x            Do not break Emoji ZWJ sequences.
 #
-$ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+$ZWJ [^$CM];
 
 # LB 9     Combining marks.      X   $CM needs to behave like X, where X is not $SP, $BK $CR $LF $NL
 #                                $CM not covered by the above needs to behave like $AL
@@ -328,13 +322,13 @@ $IS $CM* ($ALPlus | $HL);
 $CP $CM* ($ALPlus | $HL | $NU);
 
 # LB 30a  Do not break between regional indicators. Break after pairs of them.
-#         Tricky interaction with LB8a: ZWJ x ID
-$RI $CM* $RI                / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-$RI $CM* $RI $CM*  $ZWJ     / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM $ID $Extended_Pict $EmojiNRK] {eof}];
-$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM] {eof}];
-
-$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS {eof}];
-$RI $CM* $RI $CM* $ZWJ ($ID | $Extended_Pict | $EmojiNRK);
+#         Tricky interaction with LB8a: ZWJ x .   together with ZWJ acting like a CM.
+$RI $CM* $RI                 / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$CM-$ZWJ] / [[^$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $CM]];
+$RI $CM* $RI $CM* [$BK $CR $LF $NL $SP $ZW $WJ $CL $CP $EX $IS $SY $GL $QU $BA $HY $NS $ZWJ {eof}];
+# note: the preceding rule includes {eof} rather than having the last [set] term qualified with '?'
+#       because of the chain-out behavior difference. The rule must chain out only from the [set characters],
+#       not from the preceding $RI or $CM, which it would be able to do if the set were optional.
 
 # LB 30b Do not break between an Emoji Base and an Emoji Modifier
 $EB $CM* $EM;
diff --git a/icu4c/source/test/testdata/break_rules/line_loose.txt b/icu4c/source/test/testdata/break_rules/line_loose.txt
index 45de73b5eb9..00552fec7c0 100644
--- a/icu4c/source/test/testdata/break_rules/line_loose.txt
+++ b/icu4c/source/test/testdata/break_rules/line_loose.txt
@@ -5,7 +5,8 @@
 #
 #  file:  line_loose.txt
 #
-# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest
+# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
+# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@@ -67,9 +68,6 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];
 
-EmojiNRK = [[\p{Emoji}] - [[RI]\u002a\u00230-9Â©Â®â¢ã°ã½]];
-Extended_Pict = [:ExtPict:];
-
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@@ -105,8 +103,10 @@ LB7.2:      [ZW SP] [SP ZW];
 LB8:        ZW Ã·;
 
 # LB8a
-#      ZWJ x (ID | Extended_Pict | EmojiNRK)
-LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
+#      ZWJ x
+#      Don't match a CM on the right - let other rules pick up CM sequences, where
+#      the ZWJ behaves as just another generic CM.
+LB8a:       ZWJ [^CM];
 
 
 # LB9:  X CM -> X
@@ -115,7 +115,7 @@ LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
 #LB11:       Ã WJ;
 #            WJ Ã
 
-LB11.1:      [^BK CR LF NL SP ZW] CM* WJ;
+LB11.1:      [^SP] CM* WJ;
 LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
@@ -141,12 +141,14 @@ LB19:        . CM* QU;
 LB19.1:      QU CM* [^CM];
 
 # LB 20   Break before and after CB.
-#         Interaction with LB8a:  ZWJ x ID is tricky because CM includes ZWJ.
+#         Interaction with LB8a:  ZWJ x . is tricky because CM includes ZWJ.
 #                                 ZWJ acts like a CM to the left, combining with CB.
-#                                 ZWJ acts independently to the right, no break from ID by LB8a.
-LB20:        . CM* Ã· CB;
-LB20.1a:     CB CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB20.1b:      CB CM* Ã·;
+#                                 ZWJ acts independently to the right, no break after by LB8a.
+LB20.1:      . CM* ZWJ CB;
+LB20.2:      . CM* Ã· CB;
+
+LB20.3:      CB CM* ZWJ [^CM];
+LB20.4:      CB CM* Ã·;
 
 # Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
 #       not picking up the continuing match after the BA from 21a.
@@ -193,15 +195,15 @@ LB29:        IS CM* (AL | HL);
 LB30.1:      (AL | CM | HL | NU) CM* OP;
 LB30.2:      CP CM* (AL | HL | NU);
 
-# LB31  keep pairs of RI together.
-LB30a.1:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
-LB30a.2:     RI CM* RI CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB30a.3:     RI CM* RI CM* Ã·;
+# LB30a  keep pairs of RI together.
+LB30a.1:     RI CM* RI              Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.2:     RI CM* RI CM* [CM-ZWJ] Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
 
 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
 
 # LB31 Break Everywhere Else.
 #      Include combining marks
-LB31.1:        . CM* ZWJ (ID | Extended_Pict | EmojiNRK);
+LB31.1:        . CM* ZWJ [^CM];
 LB31.2:        . CM* Ã·;
diff --git a/icu4c/source/test/testdata/break_rules/line_loose_cj.txt b/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
index 63a244e22cd..59c21fd3d9a 100644
--- a/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
@@ -5,7 +5,8 @@
 #
 #  file:  line_loose_cj.txt
 #
-# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest
+# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
+# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@@ -84,9 +85,6 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];
 
-EmojiNRK = [[\p{Emoji}] - [[RI]\u002a\u00230-9Â©Â®â¢ã°ã½]];
-Extended_Pict = [:ExtPict:];
-
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@@ -122,8 +120,10 @@ LB7.2:      [ZW SP] [SP ZW];
 LB8:        ZW Ã·;
 
 # LB8a
-#      ZWJ x (ID | Extended_Pict | EmojiNRK)
-LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
+#      ZWJ x
+#      Don't match a CM on the right - let other rules pick up CM sequences, where
+#      the ZWJ behaves as just another generic CM.
+LB8a:       ZWJ [^CM];
 
 
 # LB9:  X CM -> X
@@ -132,7 +132,7 @@ LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
 #LB11:       Ã WJ;
 #            WJ Ã
 
-LB11.1:      [^BK CR LF NL SP ZW] CM* WJ;
+LB11.1:      [^SP] CM* WJ;
 LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
@@ -158,12 +158,14 @@ LB19:        . CM* QU;
 LB19.1:      QU CM* [^CM];
 
 # LB 20   Break before and after CB.
-#         Interaction with LB8a:  ZWJ x ID is tricky because CM includes ZWJ.
+#         Interaction with LB8a:  ZWJ x . is tricky because CM includes ZWJ.
 #                                 ZWJ acts like a CM to the left, combining with CB.
-#                                 ZWJ acts independently to the right, no break from ID by LB8a.
-LB20:        . CM* Ã· CB;
-LB20.1a:     CB CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB20.1b:      CB CM* Ã·;
+#                                 ZWJ acts independently to the right, no break after by LB8a.
+LB20.1:      . CM* ZWJ CB;
+LB20.2:      . CM* Ã· CB;
+
+LB20.3:      CB CM* ZWJ [^CM];
+LB20.4:      CB CM* Ã·;
 
 # Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
 #       not picking up the continuing match after the BA from 21a.
@@ -214,15 +216,15 @@ LB29:        IS CM* (AL | HL);
 LB30.1:      (AL | CM | HL | NU) CM* OP;
 LB30.2:      CP CM* (AL | HL | NU);
 
-# LB31  keep pairs of RI together.
-LB30a.1:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
-LB30a.2:     RI CM* RI CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB30a.3:     RI CM* RI CM* Ã·;
+# LB30a  keep pairs of RI together.
+LB30a.1:     RI CM* RI              Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.2:     RI CM* RI CM* [CM-ZWJ] Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
 
 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
 
 # LB31 Break Everywhere Else.
 #      Include combining marks
-LB31.1:        . CM* ZWJ (ID | Extended_Pict | EmojiNRK);
+LB31.1:        . CM* ZWJ [^CM];
 LB31.2:        . CM* Ã·;
diff --git a/icu4c/source/test/testdata/break_rules/line_normal.txt b/icu4c/source/test/testdata/break_rules/line_normal.txt
index 3fa5e235ba0..7cf2613a855 100644
--- a/icu4c/source/test/testdata/break_rules/line_normal.txt
+++ b/icu4c/source/test/testdata/break_rules/line_normal.txt
@@ -5,7 +5,8 @@
 #
 # file: line_normal.txt
 #
-# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest
+# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
+# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@@ -73,9 +74,6 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];
 
-EmojiNRK = [[\p{Emoji}] - [[RI]\u002a\u00230-9Â©Â®â¢ã°ã½]];
-Extended_Pict = [:ExtPict:];
-
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@@ -111,8 +109,10 @@ LB7.2:      [ZW SP] [SP ZW];
 LB8:        ZW Ã·;
 
 # LB8a
-#      ZWJ x (ID | Extended_Pict | EmojiNRK)
-LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
+#      ZWJ x
+#      Don't match a CM on the right - let other rules pick up CM sequences, where
+#      the ZWJ behaves as just another generic CM.
+LB8a:       ZWJ [^CM];
 
 
 # LB9:  X CM -> X
@@ -121,7 +121,7 @@ LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
 #LB11:       Ã WJ;
 #            WJ Ã
 
-LB11.1:      [^BK CR LF NL SP ZW] CM* WJ;
+LB11.1:      [^SP] CM* WJ;
 LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
@@ -147,12 +147,14 @@ LB19:        . CM* QU;
 LB19.1:      QU CM* [^CM];
 
 # LB 20   Break before and after CB.
-#         Interaction with LB8a:  ZWJ x ID is tricky because CM includes ZWJ.
+#         Interaction with LB8a:  ZWJ x . is tricky because CM includes ZWJ.
 #                                 ZWJ acts like a CM to the left, combining with CB.
-#                                 ZWJ acts independently to the right, no break from ID by LB8a.
-LB20:        . CM* Ã· CB;
-LB20.1a:     CB CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB20.1b:      CB CM* Ã·;
+#                                 ZWJ acts independently to the right, no break after by LB8a.
+LB20.1:      . CM* ZWJ CB;
+LB20.2:      . CM* Ã· CB;
+
+LB20.3:      CB CM* ZWJ [^CM];
+LB20.4:      CB CM* Ã·;
 
 # Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
 #       not picking up the continuing match after the BA from 21a.
@@ -199,15 +201,15 @@ LB29:        IS CM* (AL | HL);
 LB30.1:      (AL | CM | HL | NU) CM* OP;
 LB30.2:      CP CM* (AL | HL | NU);
 
-# LB31  keep pairs of RI together.
-LB30a.1:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
-LB30a.2:     RI CM* RI CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB30a.3:     RI CM* RI CM* Ã·;
+# LB30a  keep pairs of RI together.
+LB30a.1:     RI CM* RI              Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.2:     RI CM* RI CM* [CM-ZWJ] Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
 
 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
 
 # LB31 Break Everywhere Else.
 #      Include combining marks
-LB31.1:        . CM* ZWJ (ID | Extended_Pict | EmojiNRK);
+LB31.1:        . CM* ZWJ [^CM];
 LB31.2:        . CM* Ã·;
diff --git a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
index 5a47d8ea7c1..3af4a582db1 100644
--- a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
@@ -5,7 +5,8 @@
 #
 #  file:  line_normal_cj.txt
 #
-# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest
+# Reference Line Break rules for intltest rbbi/RBBIMonkeyTest.
+# Rules derived from Unicode Standard Annex #14 Revision 40 for Unicode 11.0
 #
 # Note: Rule syntax and the monkey test itself are still a work in progress.
 #       They are expected to change with review and the addition of support for rule tailoring.
@@ -75,9 +76,6 @@ XX = [:LineBreak =  Unknown:];
 ZW = [:LineBreak =  ZWSpace:];
 ZWJ = [:LineBreak =  ZWJ:];
 
-EmojiNRK = [[\p{Emoji}] - [[RI]\u002a\u00230-9Â©Â®â¢ã°ã½]];
-Extended_Pict = [:ExtPict:];
-
 # LB1 - Resolve AI, CB, CJ, SA, SG, and XX into other line breaking classes
 AL = [AL AI SG XX ];
 dictionary = SA;
@@ -116,8 +114,10 @@ LB7.2:      [ZW SP] [SP ZW];
 LB8:        ZW Ã·;
 
 # LB8a
-#      ZWJ x (ID | Extended_Pict | EmojiNRK)
-LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
+#      ZWJ x
+#      Don't match a CM on the right - let other rules pick up CM sequences, where
+#      the ZWJ behaves as just another generic CM.
+LB8a:       ZWJ [^CM];
 
 
 # LB9:  X CM -> X
@@ -126,7 +126,7 @@ LB8a:       ZWJ (ID | Extended_Pict | EmojiNRK);
 #LB11:       Ã WJ;
 #            WJ Ã
 
-LB11.1:      [^BK CR LF NL SP ZW] CM* WJ;
+LB11.1:      [^SP] CM* WJ;
 LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
@@ -152,12 +152,14 @@ LB19:        . CM* QU;
 LB19.1:      QU CM* [^CM];
 
 # LB 20   Break before and after CB.
-#         Interaction with LB8a:  ZWJ x ID is tricky because CM includes ZWJ.
+#         Interaction with LB8a:  ZWJ x . is tricky because CM includes ZWJ.
 #                                 ZWJ acts like a CM to the left, combining with CB.
-#                                 ZWJ acts independently to the right, no break from ID by LB8a.
-LB20:        . CM* Ã· CB;
-LB20.1a:     CB CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB20.1b:      CB CM* Ã·;
+#                                 ZWJ acts independently to the right, no break after by LB8a.
+LB20.1:      . CM* ZWJ CB;
+LB20.2:      . CM* Ã· CB;
+
+LB20.3:      CB CM* ZWJ [^CM];
+LB20.4:      CB CM* Ã·;
 
 # Note: Rule 21a must come before 21 to prevent 21.1 from matching HL BA, then
 #       not picking up the continuing match after the BA from 21a.
@@ -208,15 +210,15 @@ LB29:        IS CM* (AL | HL);
 LB30.1:      (AL | CM | HL | NU) CM* OP;
 LB30.2:      CP CM* (AL | HL | NU);
 
-# LB31  keep pairs of RI together.
-LB30a.1:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS];
-LB30a.2:     RI CM* RI CM* ZWJ (ID | Extended_Pict | EmojiNRK);
-LB30a.3:     RI CM* RI CM* Ã·;
+# LB30a  keep pairs of RI together.
+LB30a.1:     RI CM* RI              Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.2:     RI CM* RI CM* [CM-ZWJ] Ã· [^BK CR LF NL SP ZW WJ CL CP EX IS SY GL QU BA HY NS CM];
+LB30a.3:     RI CM* RI CM* [BK CR LF NL SP ZW WJ GL CL CP EX IS SY QU BA HY NS ZWJ]?;
 
 # LB30b Do not break between Emoji Base and Emoji Modifier
 LB30b:       EB CM* EM;
 
 # LB31 Break Everywhere Else.
 #      Include combining marks
-LB31.1:        . CM* ZWJ (ID | Extended_Pict | EmojiNRK);
+LB31.1:        . CM* ZWJ [^CM];
 LB31.2:        . CM* Ã·;
-- 
2.40.0