From: Andy Heninger Date: Wed, 4 Apr 2018 23:37:43 +0000 (+0000) Subject: ICU-13630 Update RBBI for Unicode 11 data. X-Git-Tag: release-62-rc~206^2~4 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4e097dc49fcad08b57f15e376b54a6c36c7271dd;p=icu ICU-13630 Update RBBI for Unicode 11 data. X-SVN-Rev: 41194 --- diff --git a/icu4c/source/data/brkitr/rules/char.txt b/icu4c/source/data/brkitr/rules/char.txt index 3a26f8289ae..6495d81b629 100644 --- a/icu4c/source/data/brkitr/rules/char.txt +++ b/icu4c/source/data/brkitr/rules/char.txt @@ -20,6 +20,9 @@ $CR = [\p{Grapheme_Cluster_Break = CR}]; $LF = [\p{Grapheme_Cluster_Break = LF}]; $Control = [[\p{Grapheme_Cluster_Break = Control}]]; +# TODO: Enable Virama & LinkingConsonant definitions once rule builder allows empty sets. +#$Virama = [[\p{Grapheme_Cluster_Break = Virama}]]; +#$LinkingConsonant = [[\p{Grapheme_Cluster_Break = LinkingConsonant}]]; $Extend = [[\p{Grapheme_Cluster_Break = Extend}]]; $ZWJ = [\p{Grapheme_Cluster_Break = ZWJ}]; $Regional_Indicator = [\p{Grapheme_Cluster_Break = Regional_Indicator}]; @@ -39,7 +42,6 @@ $LVT = [\p{Grapheme_Cluster_Break = LVT}]; # Emoji defintions $Extended_Pict = [:ExtPict:]; -$EmojiNRK = [[\p{Emoji}] - [\p{Grapheme_Cluster_Break = Regional_Indicator}*\u00230-9©®™〰〽]]; ## ------------------------------------------------- !!chain; @@ -62,7 +64,7 @@ $L ($L | $V | $LV | $LVT); $Prepend [^$Control $CR $LF]; # GB 11 Do not break within emoji modifier sequences or emoji zwj sequences. -($Extended_Pict | $EmojiNRK) $Extend* $ZWJ ($Extended_Pict | $EmojiNRK); +$Extended_Pict $Extend* $ZWJ $Extended_Pict; # GB 12-13. Keep pairs of regional indicators together # Note that hard break '/' rule triggers only if there are three or more initial RIs, @@ -77,4 +79,4 @@ $Prepend [^$Control $CR $LF]; !!safe_reverse; $Regional_Indicator $Regional_Indicator; -($Extend | $ZWJ | $EmojiNRK | $Extended_Pict)+ .; +($Extend | $ZWJ | $Extended_Pict)+ .; diff --git a/icu4c/source/data/brkitr/rules/word.txt b/icu4c/source/data/brkitr/rules/word.txt index add6c7684d4..1bbbbca5737 100644 --- a/icu4c/source/data/brkitr/rules/word.txt +++ b/icu4c/source/data/brkitr/rules/word.txt @@ -46,8 +46,8 @@ $MidLetter = [\p{Word_Break = MidLetter}]; $MidNum = [\p{Word_Break = MidNum}]; $Numeric = [\p{Word_Break = Numeric}]; $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; +$WSegSpace = [[\p{Zs}]-[\p{Line_Break = Glue}]]; $Extended_Pict = [:ExtPict:]; -$EmojiNRK = [[\p{Emoji}] - [\p{Word_Break = Regional_Indicator}\u002a\u00230-9©®™〰〽]]; $Han = [:Han:]; $Hiragana = [:Hiragana:]; @@ -101,8 +101,11 @@ $CR $LF; # Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening Extend chars allowed. # -$ZWJ ($Extended_Pict | $EmojiNRK); +$ZWJ $Extended_Pict; +# Rule 3d - Keep horizontal whitespace together. +# +$WSegSpace $WSegSpace; # Rule 4 - ignore Format and Extend characters, except when they appear at the beginning # of a region of Text. The rule here comes into play when the start of text diff --git a/icu4c/source/data/brkitr/rules/word_POSIX.txt b/icu4c/source/data/brkitr/rules/word_POSIX.txt index 0a0442be342..ee712789242 100644 --- a/icu4c/source/data/brkitr/rules/word_POSIX.txt +++ b/icu4c/source/data/brkitr/rules/word_POSIX.txt @@ -46,8 +46,8 @@ $MidLetter = [\p{Word_Break = MidLetter} - [\:]]; $MidNum = [\p{Word_Break = MidNum} [.]]; $Numeric = [\p{Word_Break = Numeric}]; $ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; +$WSegSpace = [[\p{Zs}]-[\p{Line_Break = Glue}]]; $Extended_Pict = [:ExtPict:]; -$EmojiNRK = [[\p{Emoji}] - [\p{Word_Break = Regional_Indicator}\u002a\u00230-9©®™〰〽]]; $Han = [:Han:]; $Hiragana = [:Hiragana:]; @@ -101,8 +101,11 @@ $CR $LF; # Rule 3c ZWJ x (Extended_Pict | EmojiNRK). Precedes WB4, so no intervening Extend chars allowed. # -$ZWJ ($Extended_Pict | $EmojiNRK); +$ZWJ $Extended_Pict; +# Rule 3d - Keep horizontal whitespace together. +# +$WSegSpace $WSegSpace; # Rule 4 - ignore Format and Extend characters, except when they appear at the beginning # of a region of Text. The rule here comes into play when the start of text diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp index b1bd621a898..8e80a09ccae 100644 --- a/icu4c/source/test/intltest/rbbitst.cpp +++ b/icu4c/source/test/intltest/rbbitst.cpp @@ -1585,11 +1585,7 @@ private: UnicodeSet *fLVSet; UnicodeSet *fLVTSet; UnicodeSet *fHangulSet; - UnicodeSet *fEmojiBaseSet; - UnicodeSet *fEmojiModifierSet; UnicodeSet *fExtendedPictSet; - UnicodeSet *fEBGSet; - UnicodeSet *fEmojiNRKSet; UnicodeSet *fAnySet; const UnicodeString *fText; @@ -1621,12 +1617,7 @@ RBBICharMonkey::RBBICharMonkey() { fHangulSet->addAll(*fLVSet); fHangulSet->addAll(*fLVTSet); - fEmojiBaseSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EB}]"), status); - fEmojiModifierSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EM}]"), status); fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status); - fEBGSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = EBG}]"), status); - fEmojiNRKSet = new UnicodeSet(UNICODE_STRING_SIMPLE( - "[[\\p{Emoji}]-[\\p{Grapheme_Cluster_Break = Regional_Indicator}*#0-9\\u00a9\\u00ae\\u2122\\u3030\\u303d]]"), status); fAnySet = new UnicodeSet(0, 0x10ffff); fSets = new UVector(status); @@ -1640,12 +1631,8 @@ RBBICharMonkey::RBBICharMonkey() { fSets->addElement(fSpacingSet, status); fSets->addElement(fHangulSet, status); fSets->addElement(fAnySet, status); - fSets->addElement(fEmojiBaseSet, status); - fSets->addElement(fEmojiModifierSet, status); fSets->addElement(fZWJSet, status); fSets->addElement(fExtendedPictSet, status); - fSets->addElement(fEBGSet, status); - fSets->addElement(fEmojiNRKSet,status); if (U_FAILURE(status)) { deferredStatus = status; } @@ -1765,22 +1752,8 @@ int32_t RBBICharMonkey::next(int32_t prevPos) { continue; } - // Rule (GB10) (Emoji_Base | EBG) Extend * x Emoji_Modifier - if ((fEmojiBaseSet->contains(c1) || fEBGSet->contains(c1)) && fEmojiModifierSet->contains(c2)) { - continue; - } - if ((fEmojiBaseSet->contains(cBase) || fEBGSet->contains(cBase)) && - fExtendSet->contains(c1) && fEmojiModifierSet->contains(c2)) { - continue; - } - - // Rule (GB11) (Glue_After_ZWJ | Emoji) Extend * ZWJ x (Glue_After_ZWJ | Emoji) - if ((fExtendedPictSet->contains(c0) || fEmojiNRKSet->contains(c0)) && fZWJSet->contains(c1) && - (fExtendedPictSet->contains(c2) || fEmojiNRKSet->contains(c2))) { - continue; - } - if ((fExtendedPictSet->contains(cBase) || fEmojiNRKSet->contains(cBase)) && fExtendSet->contains(c0) && fZWJSet->contains(c1) && - (fExtendedPictSet->contains(c2) || fEmojiNRKSet->contains(c2))) { + // Rule (GB11) Extended_Pictographic Extend * ZWJ x Extended_Pictographic + if (fExtendedPictSet->contains(cBase) && fZWJSet->contains(c1) && fExtendedPictSet->contains(c2)) { continue; } @@ -1827,12 +1800,8 @@ RBBICharMonkey::~RBBICharMonkey() { delete fLVTSet; delete fHangulSet; delete fAnySet; - delete fEmojiBaseSet; - delete fEmojiModifierSet; delete fZWJSet; delete fExtendedPictSet; - delete fEBGSet; - delete fEmojiNRKSet; } //------------------------------------------------------------------------------------------ @@ -1868,13 +1837,10 @@ private: UnicodeSet *fOtherSet; UnicodeSet *fExtendSet; UnicodeSet *fExtendNumLetSet; + UnicodeSet *fWSegSpaceSet; UnicodeSet *fDictionarySet; - UnicodeSet *fEBaseSet; - UnicodeSet *fEBGSet; - UnicodeSet *fEModifierSet; UnicodeSet *fZWJSet; UnicodeSet *fExtendedPictSet; - UnicodeSet *fEmojiNRKSet; const UnicodeString *fText; }; @@ -1902,14 +1868,10 @@ RBBIWordMonkey::RBBIWordMonkey() fFormatSet = new UnicodeSet(u"[\\p{Word_Break = Format}]", status); fExtendNumLetSet = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status); fExtendSet = new UnicodeSet(u"[\\p{Word_Break = Extend}]", status); + fWSegSpaceSet = new UnicodeSet(u"[[\\p{Zs}]-[\\p{Line_Break = GL}]]", status); - fEBaseSet = new UnicodeSet(u"[\\p{Word_Break = EB}]", status); - fEBGSet = new UnicodeSet(u"[\\p{Word_Break = EBG}]", status); - fEModifierSet = new UnicodeSet(u"[\\p{Word_Break = EM}]", status); fZWJSet = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]", status); fExtendedPictSet = new UnicodeSet(u"[:Extended_Pictographic:]", status); - fEmojiNRKSet = new UnicodeSet( - u"[[\\p{Emoji}]-[\\p{Word_Break = Regional_Indicator}*#0-9\\u00a9\\u00ae\\u2122\\u3030\\u303d]]", status); fDictionarySet = new UnicodeSet(u"[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]", status); fDictionarySet->addAll(*fKatakanaSet); @@ -1937,15 +1899,12 @@ RBBIWordMonkey::RBBIWordMonkey() fOtherSet->removeAll(*fMidNumSet); fOtherSet->removeAll(*fNumericSet); fOtherSet->removeAll(*fExtendNumLetSet); + fOtherSet->removeAll(*fWSegSpaceSet); fOtherSet->removeAll(*fFormatSet); fOtherSet->removeAll(*fExtendSet); fOtherSet->removeAll(*fRegionalIndicatorSet); - fOtherSet->removeAll(*fEBaseSet); - fOtherSet->removeAll(*fEBGSet); - fOtherSet->removeAll(*fEModifierSet); fOtherSet->removeAll(*fZWJSet); fOtherSet->removeAll(*fExtendedPictSet); - fOtherSet->removeAll(*fEmojiNRKSet); // Inhibit dictionary characters from being tested at all. fOtherSet->removeAll(*fDictionarySet); @@ -1969,13 +1928,10 @@ RBBIWordMonkey::RBBIWordMonkey() fSets->addElement(fExtendSet, status); fSets->addElement(fOtherSet, status); fSets->addElement(fExtendNumLetSet, status); + fSets->addElement(fWSegSpaceSet, status); - fSets->addElement(fEBaseSet, status); - fSets->addElement(fEBGSet, status); - fSets->addElement(fEModifierSet, status); fSets->addElement(fZWJSet, status); fSets->addElement(fExtendedPictSet, status); - fSets->addElement(fEmojiNRKSet, status); if (U_FAILURE(status)) { deferredStatus = status; @@ -2059,7 +2015,12 @@ int32_t RBBIWordMonkey::next(int32_t prevPos) { // get the potential ZWJ, the character immediately preceding c2. // Sloppy UChar32 indexing: p2-1 may reference trail half // but char32At will get the full code point. - if (fZWJSet->contains(fText->char32At(p2-1)) && (fExtendedPictSet->contains(c2) || fEmojiNRKSet->contains(c2))) { + if (fZWJSet->contains(fText->char32At(p2-1)) && fExtendedPictSet->contains(c2)) { + continue; + } + + // Rule (3d) Keep horizontal whitespace together. + if (fWSegSpaceSet->contains(fText->char32At(p2-1)) && fWSegSpaceSet->contains(c2)) { continue; } @@ -2192,15 +2153,12 @@ RBBIWordMonkey::~RBBIWordMonkey() { delete fFormatSet; delete fExtendSet; delete fExtendNumLetSet; + delete fWSegSpaceSet; delete fRegionalIndicatorSet; delete fDictionarySet; delete fOtherSet; - delete fEBaseSet; - delete fEBGSet; - delete fEModifierSet; delete fZWJSet; delete fExtendedPictSet; - delete fEmojiNRKSet; } diff --git a/icu4c/source/test/testdata/break_rules/grapheme.txt b/icu4c/source/test/testdata/break_rules/grapheme.txt index 1d754cf2784..27498c1ff13 100644 --- a/icu4c/source/test/testdata/break_rules/grapheme.txt +++ b/icu4c/source/test/testdata/break_rules/grapheme.txt @@ -35,7 +35,6 @@ LVT = [\p{Grapheme_Cluster_Break = LVT}]; # Emoji defintions -EmojiNRK = [[\p{Emoji}] - [Regional_Indicator\u002a\u00230-9©®™〰〽]]; Extended_Pict = [:ExtPict:]; GB3: CR LF; @@ -46,7 +45,7 @@ GB6: L (L | V | LV | LVT); GB7: (LV | V) (V | T); GB8: (LVT | T) T; -GB11: (Extended_Pict | EmojiNRK) Extend* ZWJ (Extended_Pict | EmojiNRK); +GB11: Extended_Pict Extend* ZWJ Extended_Pict; GB9: . (Extend | ZWJ); GB9a: . SpacingMark; diff --git a/icu4c/source/test/testdata/break_rules/word.txt b/icu4c/source/test/testdata/break_rules/word.txt index 4f347729c2a..541367404a3 100644 --- a/icu4c/source/test/testdata/break_rules/word.txt +++ b/icu4c/source/test/testdata/break_rules/word.txt @@ -32,7 +32,7 @@ MidLetter = [\p{Word_Break = MidLetter}]; MidNum = [\p{Word_Break = MidNum}]; Numeric = [\p{Word_Break = Numeric}]; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -EmojiNRK = [[\p{Emoji}] - [[Regional_Indicator]\u002a\u00230-9©®™〰〽]]; +WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]]; Extended_Pict = [:ExtPict:]; #define dictionary, with the effect being that those characters don't appear in test data. @@ -59,7 +59,8 @@ WB3: CR LF; WB3a: (Newline | CR | LF) ÷; WB3b: . ÷ (Newline | CR | LF); # actually redundant? No other rule combines. # (but needed with UAX treat-as scheme.) -WB3c: ZWJ (Extended_Pict | EmojiNRK); +WB3c: ZWJ Extended_Pict; +WB3d: WSegSpace WSegSpace; WB5: AHLetter ExtFmt* AHLetter; @@ -81,11 +82,11 @@ WB13b: ExtendNumLet ExtFmt* (AHLetter | Numeric | Katakana); # WB rule 15 - 17, pairs of Regional Indicators stay unbroken. # Interacts with WB3c. -WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ (Extended_Pict | EmojiNRK); +WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ Extended_Pict; WB17: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷; # Rule WB 999 Any ÷ Any # Interacts with WB3c, do not break between ZWJ and (Extended_Pict | EBG). -WB999.1: . ExtFmt* ZWJ (Extended_Pict | EmojiNRK); +WB999.1: . ExtFmt* ZWJ Extended_Pict; WB999.2: . ExtFmt* ÷; diff --git a/icu4c/source/test/testdata/break_rules/word_POSIX.txt b/icu4c/source/test/testdata/break_rules/word_POSIX.txt index 7ae55380da9..d43f6c56a6b 100644 --- a/icu4c/source/test/testdata/break_rules/word_POSIX.txt +++ b/icu4c/source/test/testdata/break_rules/word_POSIX.txt @@ -31,7 +31,7 @@ MidLetter = [\p{Word_Break = MidLetter} - [\:]]; MidNum = [\p{Word_Break = MidNum} [.]]; Numeric = [\p{Word_Break = Numeric}]; ExtendNumLet = [\p{Word_Break = ExtendNumLet}]; -EmojiNRK = [[\p{Emoji}] - [[Regional_Indicator]\u002a\u00230-9©®™〰〽]]; +WSegSpace = [[\p{Zs}]-[\p{Line_Break=Glue}]]; Extended_Pict = [:ExtPict:]; #define dictionary, with the effect being that those characters don't appear in test data. @@ -58,7 +58,8 @@ WB3: CR LF; WB3a: (Newline | CR | LF) ÷; WB3b: . ÷ (Newline | CR | LF); # actually redundant? No other rule combines. # (but needed with UAX treat-as scheme.) -WB3c: ZWJ (Extended_Pict | EmojiNRK); +WB3c: ZWJ Extended_Pict; +WB3d: WSegSpace WSegSpace; WB5: AHLetter ExtFmt* AHLetter; @@ -80,11 +81,11 @@ WB13b: ExtendNumLet ExtFmt* (AHLetter | Numeric | Katakana); # WB rule 15 - 17, pairs of Regional Indicators stay unbroken. # Interacts with WB3c. -WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ (Extended_Pict | EmojiNRK); +WB15: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ZWJ Extended_Pict; WB17: Regional_Indicator ExtFmt* Regional_Indicator ExtFmt* ÷; # Rule WB 999 Any ÷ Any # Interacts with WB3c, do not break between ZWJ and (Extended_Pict | EBG). -WB999.1: . ExtFmt* ZWJ (Extended_Pict | EmojiNRK); +WB999.1: . ExtFmt* ZWJ Extended_Pict; WB999.2: . ExtFmt* ÷; diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt index 761b3e01b5b..4a82435b3ed 100644 --- a/icu4c/source/test/testdata/rbbitst.txt +++ b/icu4c/source/test/testdata/rbbitst.txt @@ -184,7 +184,7 @@ # -•This<200> •is<200> •a<200> •word<200> •break<200>.• • •Isn't<200> •it<200>?• •2.25<100> +•This<200> •is<200> •a<200> •word<200> •break<200>.• •Isn't<200> •it<200>?• •2.25<100> @@ -195,7 +195,7 @@ • •\u0939\u093f\u0928\u094d\u200d\u0926\u0940<200> •\u0939\u0948<200> •\u0905\u093e\u092a<200> •\u0938\u093f\u0916\u094b\u0917\u0947<200>?• #Hindi Numbers -• •\u0968\u0966.\u0969\u096f<100> •\u0967\u0966\u0966.\u0966\u0966<100> •\N{RUPEE SIGN}•\u0967,\u0967\u0966\u0966.\u0966\u0966<100> • •\u0905\u092e\u091c<200>\n• +• •\u0968\u0966.\u0969\u096f<100> •\u0967\u0966\u0966.\u0966\u0966<100> •\N{RUPEE SIGN}•\u0967,\u0967\u0966\u0966.\u0966\u0966<100> •\u0905\u092e\u091c<200>\n• •\u0938\u094d\u200d\u0935\u0924\u0902deadTA\u0930<200>\r•It's<200> •$•30.10<100> •12,34<100>¢•£•¤•¥•alpha\u05f3beta\u05f4gamma<200> • @@ -1143,7 +1143,7 @@ Bangkok)• # woman zwj woman zwj girl zwj girl, woman/fitz-1-2 zwj woman/fitz-4 zwj boy/fitz-6 •\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F467•\U0001F469\U0001F3FB\u200D\U0001F469\U0001F3FD\u200D\U0001F466\U0001F3FF• # woman zwj baby/fitz-3, older_woman/fitz-5, runner/fitz-4, raised_fist/fitz-3, fuel_pump, fitz-3 -•\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC• +#TODO: •\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC• # man zwj hvy_blk_heart zwj man, woman, man zwj hvy_blk_heart/evar zwj man, woman •\U0001F468\u200D\u2764\u200D\U0001F468•\U0001F469•\U0001F468\u200D\u2764\uFE0F\u200D\U0001F468•\U0001F469• # woman zwj hvy_blk_heart/evar zwj kiss_mark zwj woman, sleuth/fitz-4, horse_racing/fitz-5 @@ -1162,7 +1162,7 @@ Bangkok)• •\U0001F3C3\u200D\u2640\uFE0F•\U0001F3C3\u200D\u2640•\U0001F3C3\U0001F3FD\u200D\u2640\uFE0F•\U0001F3C3\U0001F3FD\u200D\u2640•\u0020• # 9.0 + professions # black heart, fitz 4, squid, fitz4, man dancing /fitz4, mother xmas /fitz4 -•\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020• +#TODO: •\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020• # facepalm, facepalm / fitz4, facepalm + woman +var, facepalm + woman -var, facepalm/fitz4 + woman +var, facepalm/fitz4 + woman -var •\U0001F926•\U0001F926\U0001F3FD•\U0001F926\u200D\u2640\uFE0F•\U0001F926\u200D\u2640•\U0001F926\U0001F3FD\u200D\u2640\uFE0F•\U0001F926\U0001F3FD\u200D\u2640•\u0020• # handball, handball / fitz4, handball + man +var, handball + man -var, handball/fitz4 + man +var, handball/fitz4 + man -var @@ -1218,7 +1218,7 @@ Bangkok)• # woman zwj woman zwj girl zwj girl, woman/fitz-1-2 zwj woman/fitz-4 zwj boy/fitz-6 •\U0001F469\u200D\U0001F469\u200D\U0001F467\u200D\U0001F467•\U0001F469\U0001F3FB\u200D\U0001F469\U0001F3FD\u200D\U0001F466\U0001F3FF• # woman zwj baby/fitz-3, older_woman/fitz-5, runner/fitz-4, raised_fist/fitz-3, fuel_pump, fitz-3 -•\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC• +#TODO: •\U0001F469\u200D\U0001F476\U0001F3FC•\U0001F475\U0001F3FE•\U0001F3C3\U0001F3FD•\u270A\U0001F3FC•\u26FD•\U0001F3FC• # man zwj hvy_blk_heart zwj man, woman, man zwj hvy_blk_heart/evar zwj man, woman •\U0001F468\u200D\u2764\u200D\U0001F468•\U0001F469•\U0001F468\u200D\u2764\uFE0F\u200D\U0001F468•\U0001F469• # woman zwj hvy_blk_heart/evar zwj kiss_mark zwj woman, sleuth/fitz-4, horse_racing/fitz-5 @@ -1237,7 +1237,7 @@ Bangkok)• •\U0001F3C3\u200D\u2640\uFE0F•\U0001F3C3\u200D\u2640•\U0001F3C3\U0001F3FD\u200D\u2640\uFE0F•\U0001F3C3\U0001F3FD\u200D\u2640•\u0020• # 9.0 + professions # black heart, fitz 4, squid, fitz4, man dancing /fitz4, mother xmas /fitz4 -•\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020• +#TODO: •\U0001F5A4•\U0001F3FD•\U0001F991•\U0001F3FD•\U0001F57A\U0001F3FD•\U0001F936\U0001F3FD•\u0020• # facepalm, facepalm / fitz4, facepalm + woman +evar, facepalm + woman -evar, facepalm/fitz4 + woman +evar, facepalm/fitz4 + woman -evar •\U0001F926•\U0001F926\U0001F3FD•\U0001F926\u200D\u2640\uFE0F•\U0001F926\u200D\u2640•\U0001F926\U0001F3FD\u200D\u2640\uFE0F•\U0001F926\U0001F3FD\u200D\u2640•\u0020• # handball, handball / fitz4, handball + man +evar, handball + man -evar, handball/fitz4 + man +evar, handball/fitz4 + man -evar @@ -1436,17 +1436,17 @@ Bangkok)• # Burmese •အ<200>လော<200>င္<200>မ<200>င္<200>တရား<200> -• • • • • •မဟာ<200>ဓမ္မရာဇာ<200>မိ<200>ပတိ<200>လ<200>က္<200>ထ<200>က္<200>တ္<200>ဝ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •မ<200>င္<200>ရိ<200>မ္<200>မ<200>သ<200>က္<200>ဖ္<200>ရ<200>စ္<200>နေ<200>သ<200>ည္<200>။• •မဏိ<200>ပူ<200>ရ<200> •က<200>သ<200>ည္<200>မ္<200>ယား<200>က<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>၏• •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>ဖ္<200>ယ<200>က္<200>ဆီး<200>သ<200>ည္<200>။• •အော<200>က္<200>မ္<200>ရ<200>န္<200>မာ<200>နုိ<200>င္<200>ငံ<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>သား<200>တုိ့<200>က<200>လ<200>ည္<200> •ပု<200>န္<200>က<200>န္<200>သ<200>ည္<200>။• •မတ္တ<200>ရာ<200>အု<200>တ္<200>ဖုိ<200>ရ္<200>ဟိ<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •ထ<200>က္<200>ရ္<200>ဝ<200>သ<200>ည္<200>။• +• •မဟာ<200>ဓမ္မရာဇာ<200>မိ<200>ပတိ<200>လ<200>က္<200>ထ<200>က္<200>တ္<200>ဝ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •မ<200>င္<200>ရိ<200>မ္<200>မ<200>သ<200>က္<200>ဖ္<200>ရ<200>စ္<200>နေ<200>သ<200>ည္<200>။• •မဏိ<200>ပူ<200>ရ<200> •က<200>သ<200>ည္<200>မ္<200>ယား<200>က<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>၏• •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>ဖ္<200>ယ<200>က္<200>ဆီး<200>သ<200>ည္<200>။• •အော<200>က္<200>မ္<200>ရ<200>န္<200>မာ<200>နုိ<200>င္<200>ငံ<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>သား<200>တုိ့<200>က<200>လ<200>ည္<200> •ပု<200>န္<200>က<200>န္<200>သ<200>ည္<200>။• •မတ္တ<200>ရာ<200>အု<200>တ္<200>ဖုိ<200>ရ္<200>ဟိ<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •ထ<200>က္<200>ရ္<200>ဝ<200>သ<200>ည္<200>။• • -• • • • •ထုိ<200>အ<200>ခ္<200>ယိ<200>န္<200>တ္<200>ဝ<200>င္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သူ<200>က္<200>ရီး<200> •အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မိမိ<200>၏•ရ္<200>ဝာ<200>ကုိ<200> •လုံ<200>ခ္<200>ရုံ<200>အော<200>င္<200>ထ<200>န္<200>လုံး<200>တ<200>ပ္<200>မ္<200>ယား<200>ကာ<200>ရ<200>သ<200>ည္<200>။• •အနီး<200>အ<200>ပား<200> •က္<200>ယေး<200>ရ္<200>ဝာ<200> •လေး<200>ဆ<200>ယ့္<200>ခ္<200>ရော<200>က္<200>ရ္<200>ဝာ<200>ကုိ<200> •သိ<200>မ္း<200>သ္<200>ဝ<200>င္<200>ထား<200>သ<200>ည္<200>။• •မ<200>က္<200>ရာ<200>မီ<200>ပ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200> •လ<200>က္<200>တ္<200>ဝ<200>င္<200>သ<200>က္<200>ဆ<200>င္<200>ရ<200>တော့<200>သ<200>ည္<200>။• +• •ထုိ<200>အ<200>ခ္<200>ယိ<200>န္<200>တ္<200>ဝ<200>င္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သူ<200>က္<200>ရီး<200> •အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မိမိ<200>၏•ရ္<200>ဝာ<200>ကုိ<200> •လုံ<200>ခ္<200>ရုံ<200>အော<200>င္<200>ထ<200>န္<200>လုံး<200>တ<200>ပ္<200>မ္<200>ယား<200>ကာ<200>ရ<200>သ<200>ည္<200>။• •အနီး<200>အ<200>ပား<200> •က္<200>ယေး<200>ရ္<200>ဝာ<200> •လေး<200>ဆ<200>ယ့္<200>ခ္<200>ရော<200>က္<200>ရ္<200>ဝာ<200>ကုိ<200> •သိ<200>မ္း<200>သ္<200>ဝ<200>င္<200>ထား<200>သ<200>ည္<200>။• •မ<200>က္<200>ရာ<200>မီ<200>ပ<200>င္<200> •အ<200>င္<200>ဝ<200>နေ<200>ပ္<200>ရ<200>ည္<200>တော္<200>က္<200>ရီး<200>သ<200>ည္<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200> •လ<200>က္<200>တ္<200>ဝ<200>င္<200>သ<200>က္<200>ဆ<200>င္<200>ရ<200>တော့<200>သ<200>ည္<200>။• • -• • • • •အ<200>င္<200>ဝ<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>ပ္<200>ရီး<200>သော<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>က္<200>ယေး<200>ရ္<200>ဝာ<200>မ္<200>ယား<200>ကုိ<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •လာ<200>က္<200>ရ<200>ရာ<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သုိ့<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •တ<200>ခ္<200>ယိ<200>န္<200>တ<200>ည္<200>မ္<200>ဟာ<200>ပ<200>င္<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>ဖ္<200>ဝဲ့<200>န္<200>ဟ<200>စ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •ခ္<200>ရေ<200>ငံ<200>စ္<200>ဝာ<200> •ဆ<200>က္<200>ဆံ<200>သ<200>ည္<200>။• •မ<200>ည္<200>သူ့<200>သ<200>စ္<200>စာ<200>ကုိ<200>မ္<200>ယ္<200>ဟ<200> •ခံ<200>ယူ<200>ခ္<200>ရ<200>င္<200>မ<200>ပ္<200>ရု<200>ပေ<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •အ<200>ပ္<200>ရ<200>န္<200>ခ<200>ရီး<200>တ္<200>ဝ<200>င္<200> •လ<200>မ္<200>မ္<200>ဟ<200>ဖ္<200>ရ<200>တ္<200>၍• •တုိ<200>က္<200>ခုိ<200>က္<200>သ<200>ည္<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>မ္<200>ယား<200> •အထိ<200>အ<200>ခုိ<200>က္<200>အ<200>က္<200>ယ<200>အ<200>ဆုံး<200>မ္<200>ယား<200>စ္<200>ဝာ<200>ဖ္<200>ရ<200>င္<200> •ပ္<200>ရ<200>န္<200>ရ<200>သ<200>ည္<200>။• +• •အ<200>င္<200>ဝ<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>ပ္<200>ရီး<200>သော<200> •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>က္<200>ယေး<200>ရ္<200>ဝာ<200>မ္<200>ယား<200>ကုိ<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •လာ<200>က္<200>ရ<200>ရာ<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>သုိ့<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •တ<200>ခ္<200>ယိ<200>န္<200>တ<200>ည္<200>မ္<200>ဟာ<200>ပ<200>င္<200> •က္<200>ဝေ့<200>ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>က<200>လ<200>ည္<200> •သစ္စာ<200>ခံ<200>ခုိ<200>င္<200>ရ<200>န္<200> •ရော<200>က္<200>ရ္<200>ဟိ<200>လာ<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>ဖ္<200>ဝဲ့<200>န္<200>ဟ<200>စ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •ခ္<200>ရေ<200>ငံ<200>စ္<200>ဝာ<200> •ဆ<200>က္<200>ဆံ<200>သ<200>ည္<200>။• •မ<200>ည္<200>သူ့<200>သ<200>စ္<200>စာ<200>ကုိ<200>မ္<200>ယ္<200>ဟ<200> •ခံ<200>ယူ<200>ခ္<200>ရ<200>င္<200>မ<200>ပ္<200>ရု<200>ပေ<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>ကုိ<200> •အ<200>ပ္<200>ရ<200>န္<200>ခ<200>ရီး<200>တ္<200>ဝ<200>င္<200> •လ<200>မ္<200>မ္<200>ဟ<200>ဖ္<200>ရ<200>တ္<200>၍• •တုိ<200>က္<200>ခုိ<200>က္<200>သ<200>ည္<200>။• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>ဖ္<200>ဝဲ့<200>မ္<200>ယား<200> •အထိ<200>အ<200>ခုိ<200>က္<200>အ<200>က္<200>ယ<200>အ<200>ဆုံး<200>မ္<200>ယား<200>စ္<200>ဝာ<200>ဖ္<200>ရ<200>င္<200> •ပ္<200>ရ<200>န္<200>ရ<200>သ<200>ည္<200>။• • -• • • • •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •လာ<200>ရော<200>က္<200>တုိ<200>က္<200>ခုိ<200>က္<200>က္<200>ရ<200>ပ္<200>ရ<200>န္<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •သ္<200>ဝေး<200>သော<200>က္<200>ရဲ<200>ဘော္<200> •ခ္<200>ရော<200>က္<200>က္<200>ယိ<200>ပ္<200>ရ္<200>ဟ<200>စ္<200>ယော<200>က္<200>န္<200>ဟ<200>င္<200>အတူ<200> •ဦးစီး<200>ကာ<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>စ္<200>ဝာ<200>ခု<200>ခံ<200>တ္<200>ဝ<200>န္<200>လ္<200>ဟ<200>န္<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ထုိ့<200>နော<200>က္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ရ္<200>ဟိ<200> •ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>န္<200>ဟ<200>င္<200> •မ္<200>ရ<200>န္<200>မာ<200>မ္<200>ယား<200>ကုိ<200>လ<200>ည္<200> •ဆ<200>က္<200>သ္<200>ဝ<200>ယ္<200>စ<200>ည္<200>ရုံး<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ဤ<200>သုိ့<200>ဖ္<200>ရ<200>င္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>၏• •အ<200>ရ္<200>ဟိ<200>န္<200>အ<200>ဝာ<200> •မ္<200>ရ<200>င္<200>မား<200>လာ<200>လေ<200>သ<200>ည္<200>။• +• •ဟံ<200>သာ<200>ဝ<200>တီ<200>တ<200>ပ္<200>မ္<200>ယား<200>သ<200>ည္<200> •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •လာ<200>ရော<200>က္<200>တုိ<200>က္<200>ခုိ<200>က္<200>က္<200>ရ<200>ပ္<200>ရ<200>န္<200>သ<200>ည္<200>။• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •သ္<200>ဝေး<200>သော<200>က္<200>ရဲ<200>ဘော္<200> •ခ္<200>ရော<200>က္<200>က္<200>ယိ<200>ပ္<200>ရ္<200>ဟ<200>စ္<200>ယော<200>က္<200>န္<200>ဟ<200>င္<200>အတူ<200> •ဦးစီး<200>ကာ<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>စ္<200>ဝာ<200>ခု<200>ခံ<200>တ္<200>ဝ<200>န္<200>လ္<200>ဟ<200>န္<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ထုိ့<200>နော<200>က္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •မ္<200>ရော<200>က္<200>ဘ<200>က္<200>တ<200>လ္<200>ဝ္<200>ဟား<200>ရ္<200>ဟိ<200> •ရ္<200>ဟ<200>မ္<200>မ္<200>ယား<200>န္<200>ဟ<200>င္<200> •မ္<200>ရ<200>န္<200>မာ<200>မ္<200>ယား<200>ကုိ<200>လ<200>ည္<200> •ဆ<200>က္<200>သ္<200>ဝ<200>ယ္<200>စ<200>ည္<200>ရုံး<200>နုိ<200>င္<200>ခဲ့<200>သ<200>ည္<200>။• •ဤ<200>သုိ့<200>ဖ္<200>ရ<200>င္<200> •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>၏• •အ<200>ရ္<200>ဟိ<200>န္<200>အ<200>ဝာ<200> •မ္<200>ရ<200>င္<200>မား<200>လာ<200>လေ<200>သ<200>ည္<200>။• • -• • • • •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>လော<200>င္<200>မ<200>င္<200>တ<200>ရား<200>ဘ္<200>ဝဲ့<200>ကုိ<200> •ခံယူ<200>ကာ<200> •ကု<200>န္<200>ဘော<200>င္<200>မ<200>င္<200>ဆ<200>က္<200>ကုိ<200>စ<200>တ<200>င္<200>တ<200>ည္<200>ထော<200>င္<200>သ<200>ည္<200>။• •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •ရ္<200>ဝ္<200>ဟ<200>ဝေ<200>ဘုိ<200>ဟု<200> •သ<200>မု<200>တ္<200>ကာ<200> •မ္<200>ရုိ့<200>န<200>န္<200>တ<200>ည္<200>သ<200>ည္<200>။• •န<200>န္<200>တ<200>ည္<200>သ<200>က္<200>က<200>ရာ<200>ဇ္<200>ဖ္<200>ရ<200>စ္<200>သော<200> •၁၁၁၅<100> •ခု<200>ကုိ<200> •ဥ<200>ဩ<200>အော္<200>မ္<200>ရ<200>ည္<200> •ကု<200>န္<200>ဘော<200>င္<200>တ<200>ည္<200>ဟု<200> •အ<200>မ္<200>ဟ<200>တ္<200>အ<200>သား<200>ပ္<200>ရု<200>က္<200>ရ<200>သ<200>ည္<200>။• +• •ဦး<200>အော<200>င္<200>ဇေ<200>ယ္<200>ယ<200>သ<200>ည္<200> •အ<200>လော<200>င္<200>မ<200>င္<200>တ<200>ရား<200>ဘ္<200>ဝဲ့<200>ကုိ<200> •ခံယူ<200>ကာ<200> •ကု<200>န္<200>ဘော<200>င္<200>မ<200>င္<200>ဆ<200>က္<200>ကုိ<200>စ<200>တ<200>င္<200>တ<200>ည္<200>ထော<200>င္<200>သ<200>ည္<200>။• •မု<200>ဆုိး<200>ဖုိ<200>ရ္<200>ဝာ<200>ကုိ<200> •ရ္<200>ဝ္<200>ဟ<200>ဝေ<200>ဘုိ<200>ဟု<200> •သ<200>မု<200>တ္<200>ကာ<200> •မ္<200>ရုိ့<200>န<200>န္<200>တ<200>ည္<200>သ<200>ည္<200>။• •န<200>န္<200>တ<200>ည္<200>သ<200>က္<200>က<200>ရာ<200>ဇ္<200>ဖ္<200>ရ<200>စ္<200>သော<200> •၁၁၁၅<100> •ခု<200>ကုိ<200> •ဥ<200>ဩ<200>အော္<200>မ္<200>ရ<200>ည္<200> •ကု<200>န္<200>ဘော<200>င္<200>တ<200>ည္<200>ဟု<200> •အ<200>မ္<200>ဟ<200>တ္<200>အ<200>သား<200>ပ္<200>ရု<200>က္<200>ရ<200>သ<200>ည္<200>။• • -• • • • •အ<200>လော<200>င္<200>မ<200>င္<200>တရား<200>သ<200>ည္<200> •ဧရာ<200>ဝ<200>တီ<200>န္<200>ဟ<200>င္<200>ခ္<200>ယ<200>င္<200>တ္<200>ဝ<200>င္<200> •မ္<200>ရ<200>စ္<200>န္<200>ဟ<200>စ္<200>သ္<200>ဝ<200>ယ္<200>အ<200>က္<200>ရား<200> •ဒေ<200>သ<200>မ္<200>ယား<200>ကုိ<200>အ<200>ခုိ<200>င္<200>အ<200>မာ<200> •စု<200>စ<200>ည္<200>ပ္<200>ရီး<200>နော<200>က္<200> •အ<200>င္<200>ဝ<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>သ<200>ည္<200>။• •ထုိ<200>နော<200>က္<200>တ္<200>ဝ<200>င္<200>ပ္<200>ရ<200>ည္<200>၊• •လ္<200>ဝ<200>န္<200>ဆေး<200>၊• •ဒ<200>ဂုံ<200>မ္<200>ရုိ့<200>မ္<200>ယား<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>သ<200>ည္<200>။• •လ္<200>ဝ<200>န္<200>ဆေး<200> •ကုိ<200>မ္<200>ရ<200>န္<200>အော<200>င္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>သ<200>ည္<200>။• •ဒ<200>ဂုံ<200>ကုိ<200>ရ<200>န္<200>ကု<200>န္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>ထ<200>သ<200>ည္<200>။• +• •အ<200>လော<200>င္<200>မ<200>င္<200>တရား<200>သ<200>ည္<200> •ဧရာ<200>ဝ<200>တီ<200>န္<200>ဟ<200>င္<200>ခ္<200>ယ<200>င္<200>တ္<200>ဝ<200>င္<200> •မ္<200>ရ<200>စ္<200>န္<200>ဟ<200>စ္<200>သ္<200>ဝ<200>ယ္<200>အ<200>က္<200>ရား<200> •ဒေ<200>သ<200>မ္<200>ယား<200>ကုိ<200>အ<200>ခုိ<200>င္<200>အ<200>မာ<200> •စု<200>စ<200>ည္<200>ပ္<200>ရီး<200>နော<200>က္<200> •အ<200>င္<200>ဝ<200>ကုိ<200> •တုိ<200>က္<200>ခုိ<200>က္<200>အော<200>င္<200>မ္<200>ရ<200>င္<200>သ<200>ည္<200>။• •ထုိ<200>နော<200>က္<200>တ္<200>ဝ<200>င္<200>ပ္<200>ရ<200>ည္<200>၊• •လ္<200>ဝ<200>န္<200>ဆေး<200>၊• •ဒ<200>ဂုံ<200>မ္<200>ရုိ့<200>မ္<200>ယား<200>ကုိ<200> •သိ<200>မ္<200>ပုိ<200>က္<200>သ<200>ည္<200>။• •လ္<200>ဝ<200>န္<200>ဆေး<200> •ကုိ<200>မ္<200>ရ<200>န္<200>အော<200>င္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>သ<200>ည္<200>။• •ဒ<200>ဂုံ<200>ကုိ<200>ရ<200>န္<200>ကု<200>န္<200>ဟူ<200>၍• •သ<200>မု<200>တ္<200>ထ<200>သ<200>ည္<200>။• # japanese diff --git a/icu4c/source/test/testdata/regextst.txt b/icu4c/source/test/testdata/regextst.txt index a3f8dc78f5b..977a7b34590 100644 --- a/icu4c/source/test/testdata/regextst.txt +++ b/icu4c/source/test/testdata/regextst.txt @@ -252,7 +252,7 @@ # Unicode word boundary mode # "(?w).*?\b" v "<0>hello, world" -"(?w).*?(\b.+?\b).*" v "<0><1> 123.45 " +"(?w).*?(\b.+?\b).*" v "<0><1> 123.45 " "(?w).*?(\b\d.*?\b).*" v "<0> <1>123.45 " ".*?(\b.+?\b).*" "<0> <1>123.45 " "(?w:.*?(\b\d.*?\b).*)" v "<0> <1>123.45 "