From b58e06a604a6d279809716ec192630d8cbca3aa1 Mon Sep 17 00:00:00 2001 From: Andy Heninger Date: Fri, 6 Apr 2018 17:31:18 +0000 Subject: [PATCH] ICU-13630 WSegSpace, use word property value in rules definitions. X-SVN-Rev: 41205 --- icu4c/source/data/brkitr/rules/word.txt | 2 +- icu4c/source/data/brkitr/rules/word_POSIX.txt | 2 +- icu4c/source/test/intltest/rbbitst.cpp | 2 +- icu4c/source/test/testdata/break_rules/word.txt | 2 +- icu4c/source/test/testdata/break_rules/word_POSIX.txt | 2 +- .../core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java | 2 +- .../core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt | 2 +- .../src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/icu4c/source/data/brkitr/rules/word.txt b/icu4c/source/data/brkitr/rules/word.txt index 1bbbbca5737..8a4ef977519 100644 --- a/icu4c/source/data/brkitr/rules/word.txt +++ b/icu4c/source/data/brkitr/rules/word.txt @@ -46,7 +46,7 @@ $MidLetter = [\p{Word_Break = MidLetter}]; $MidNum = [\p{Word_Break = MidNum}]; $Numeric = [\p{Word_Break = Numeric}]; $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -$WSegSpace = [[\p{Zs}]-[\p{Line_Break = Glue}]]; +$WSegSpace = [\p{Word_Break = WSegSpace}]; $Extended_Pict = [:ExtPict:]; $Han = [:Han:]; diff --git a/icu4c/source/data/brkitr/rules/word_POSIX.txt b/icu4c/source/data/brkitr/rules/word_POSIX.txt index ee712789242..4cc8cad2c2b 100644 --- a/icu4c/source/data/brkitr/rules/word_POSIX.txt +++ b/icu4c/source/data/brkitr/rules/word_POSIX.txt @@ -46,7 +46,7 @@ $MidLetter = [\p{Word_Break = MidLetter} - [\:]]; $MidNum = [\p{Word_Break = MidNum} [.]]; $Numeric = [\p{Word_Break = Numeric}]; $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -$WSegSpace = [[\p{Zs}]-[\p{Line_Break = Glue}]]; +$WSegSpace = [\p{Word_Break = WSegSpace}]; $Extended_Pict = [:ExtPict:]; $Han = [:Han:]; diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index 864a5a4fce7..6c8cff8602c 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -1868,7 +1868,7 @@ RBBIWordMonkey::RBBIWordMonkey() fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status); fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status); fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status); - fWSegSpaceSet = new UnicodeSet(u"[[\\p{Zs}]-[\\p{Line_Break = GL}]]", status); + fWSegSpaceSet = new UnicodeSet(u"[\\p{Word_Break = WSegSpace}]", status); fZWJSet = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]", status); fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status); diff --git a/icu4c/source/test/testdata/break_rules/word.txt b/icu4c/source/test/testdata/break_rules/word.txt index 541367404a3..fc7bc9b1886 100644 --- a/icu4c/source/test/testdata/break_rules/word.txt +++ b/icu4c/source/test/testdata/break_rules/word.txt @@ -32,7 +32,7 @@ MidLetter = [\p{Word_Break = MidLetter}]; MidNum = [\p{Word_Break = MidNum}]; Numeric = [\p{Word_Break = Numeric}]; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]]; +WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; #define dictionary, with the effect being that those characters don't appear in test data. diff --git a/icu4c/source/test/testdata/break_rules/word_POSIX.txt b/icu4c/source/test/testdata/break_rules/word_POSIX.txt index d43f6c56a6b..10efc32d210 100644 --- a/icu4c/source/test/testdata/break_rules/word_POSIX.txt +++ b/icu4c/source/test/testdata/break_rules/word_POSIX.txt @@ -31,7 +31,7 @@ MidLetter = [\p{Word_Break = MidLetter} - [\:]]; MidNum = [\p{Word_Break = MidNum} [.]]; Numeric = [\p{Word_Break = Numeric}]; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]]; +WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; #define dictionary, with the effect being that those characters don't appear in test data. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java index 41788ef54ba..c986f04df9e 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java @@ -334,7 +334,7 @@ public class RBBITestMonkey extends TestFmwk { fFormatSet = new UnicodeSet("[\\p{Word_Break = Format}]"); fExtendNumLetSet = new UnicodeSet("[\\p{Word_Break = ExtendNumLet}]"); fExtendSet = new UnicodeSet("[\\p{Word_Break = Extend}]"); - fWSegSpaceSet = new UnicodeSet("[[\\p{Zs}]-[\\p{Line_Break = GL}]]"); + fWSegSpaceSet = new UnicodeSet("[\\p{Word_Break = WSegSpace}]"); fZWJSet = new UnicodeSet("[\\p{Word_Break = ZWJ}]"); fExtendedPictSet = new UnicodeSet("[:Extended_Pictographic:]"); diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt index 541367404a3..fc7bc9b1886 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word.txt @@ -32,7 +32,7 @@ MidLetter = [\p{Word_Break = MidLetter}]; MidNum = [\p{Word_Break = MidNum}]; Numeric = [\p{Word_Break = Numeric}]; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]]; +WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; #define dictionary, with the effect being that those characters don't appear in test data. diff --git a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt index d43f6c56a6b..10efc32d210 100644 --- a/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt +++ b/icu4j/main/tests/core/src/com/ibm/icu/dev/test/rbbi/break_rules/word_POSIX.txt @@ -31,7 +31,7 @@ MidLetter = [\p{Word_Break = MidLetter} - [\:]]; MidNum = [\p{Word_Break = MidNum} [.]]; Numeric = [\p{Word_Break = Numeric}]; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]]; +WSegSpace = [\p{Word_Break = WSegSpace}]; Extended_Pict = [:ExtPict:]; #define dictionary, with the effect being that those characters don't appear in test data. -- 2.40.0