From 2f87cf4c46e23e2f61af2b9b64b75e16544e985b Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 25 Aug 2017 22:46:12 +0000 Subject: [PATCH] ICU-10524 normalization one-way mapping with trailing ccc>1 has no compose-boundary-after X-SVN-Rev: 40355 --- icu4c/source/common/norm2_nfc_data.h | 40 +++++++++--------- icu4c/source/data/in/nfc.nrm | Bin 35384 -> 35384 bytes icu4c/source/data/in/nfkc.nrm | Bin 54300 -> 54300 bytes icu4c/source/data/in/nfkc_cf.nrm | Bin 51928 -> 51928 bytes icu4c/source/data/in/uts46.nrm | Bin 61068 -> 61068 bytes icu4c/source/test/intltest/tstnorm.cpp | 18 ++++++++ icu4c/source/test/intltest/tstnorm.h | 1 + icu4c/source/tools/gennorm2/n2builder.cpp | 16 +++++-- icu4c/source/tools/gennorm2/n2builder.h | 3 +- icu4j/main/shared/data/icudata.jar | 4 +- .../icu/dev/test/normalizer/BasicTest.java | 12 ++++++ 11 files changed, 68 insertions(+), 26 deletions(-) diff --git a/icu4c/source/common/norm2_nfc_data.h b/icu4c/source/common/norm2_nfc_data.h index a8e33878dfb..8f5c4346db5 100644 --- a/icu4c/source/common/norm2_nfc_data.h +++ b/icu4c/source/common/norm2_nfc_data.h @@ -300,21 +300,21 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={ 1,1,1,1,0x864,0x198d,1,1,1,1,1,1,0x868,0x1993,1,0x86c, 0x1999,1,1,1,1,1,1,1,0xfc0e,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,0xffcc,0xffb8,0xffcc, -0xffcc,1,1,1,0x29dd,0x29e3,0x29e9,0x29ef,0x29f5,0x29fb,0x2a01,0x2a07,1,1,1,1, +0xffcc,1,1,1,0x29dc,0x29e2,0x29e8,0x29ee,0x29f4,0x29fa,0x2a00,0x2a06,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,0xfe0e,1,0xfc00,1,1,1,1,1, 1,1,1,0x870,1,1,1,0x199f,0x19a5,0xfe12,1,1,1,1,1,1, -1,1,1,0xfc00,1,1,1,1,0x2a0d,0x2a13,1,0x2a19,1,1,1,1, -1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2a1f, -1,1,0x2a25,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1, +1,1,1,0xfc00,1,1,1,1,0x2a0c,0x2a12,1,0x2a18,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x2a1e, +1,1,0x2a24,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1, -1,1,1,1,1,0x2a2b,0x2a31,0x2a37,1,1,0x2a3d,1,1,1,1,1, +1,1,1,1,1,0x2a2a,0x2a30,0x2a36,1,1,0x2a3c,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,0xfe0e,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,0xfe12,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0x878, 0x19ab,1,1,0x19b1,0x19b7,0xfe12,1,1,1,1,1,1,1,1,0xfc00,0xfc00, -1,1,1,1,0x2a43,0x2a49,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,0x2a42,0x2a48,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,0x884,1,0x19bd,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,0xfc00,1, @@ -342,7 +342,7 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={ 1,1,1,0x2a4f,1,1,1,1,1,1,1,1,1,0x2a55,1,1, 1,1,0x2a5b,1,1,1,1,0x2a61,1,1,1,1,0x2a67,1,1,1, 1,1,1,1,1,1,1,1,1,0x2a6d,1,1,1,1,1,1, -1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a73,1,0x2a79,1,0xff04,0xff04,0xff04,0xff04,1,1, +1,0xff02,0xff04,0x3c40,0xff08,0x3c48,0x2a72,1,0x2a78,1,0xff04,0xff04,0xff04,0xff04,1,1, 0xff04,0x3c50,0xffcc,0xffcc,0xfe12,1,0xffcc,0xffcc,1,1,1,1,1,1,1,1, 1,1,1,0x2a7f,1,1,1,1,1,1,1,1,1,0x2a85,1,1, 1,1,0x2a8b,1,1,1,1,0x2a91,1,1,1,1,0x2a97,1,1,1, @@ -406,15 +406,15 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={ 0x21ef,0x21f9,0x2203,0x220d,0x10d8,0x10e6,0x2217,0x2221,0x222b,0x2235,1,1,0x10f4,0x1102,0x223f,0x2249, 0x2253,0x225d,1,1,0x1110,0x1122,0x2267,0x2271,0x227b,0x2285,0x228f,0x2299,1,0x1134,1,0x22a3, 1,0x22ad,1,0x22b7,0x1146,0x115c,0x1174,0x1182,0x1190,0x119e,0x11ac,0x11ba,0x11c6,0x11dc,0x11f4,0x1202, -0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b97,0x1250,0x3b9e,0x22c5,0x3ba7,0x22cb,0x3baf,0x22d1,0x3bb7, +0x1210,0x121e,0x122c,0x123a,0x1246,0x3b8e,0x22bf,0x3b96,0x1250,0x3b9e,0x22c5,0x3ba6,0x22cb,0x3bae,0x22d1,0x3bb6, 0x125a,0x3bbe,1,1,0x22d8,0x22e2,0x22f1,0x2301,0x2311,0x2321,0x2331,0x2341,0x234c,0x2356,0x2365,0x2375, 0x2385,0x2395,0x23a5,0x23b5,0x23c0,0x23ca,0x23d9,0x23e9,0x23f9,0x2409,0x2419,0x2429,0x2434,0x243e,0x244d,0x245d, 0x246d,0x247d,0x248d,0x249d,0x24a8,0x24b2,0x24c1,0x24d1,0x24e1,0x24f1,0x2501,0x2511,0x251c,0x2526,0x2535,0x2545, -0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb,0x3bc7, -0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bcf,0x2607,0x3bd7, -0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be1,1,1,0x263b,0x2643,0x264b,0x2651,0x2657,0x3beb, -1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf5,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad,0x3bff, -0x26b3,0x26b9,0x3c07,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0f,0x26e9,0x3c17, +0x2555,0x2565,0x2575,0x2585,0x258f,0x2595,0x259d,0x25a4,0x25ad,1,0x1264,0x25b7,0x25bf,0x25c5,0x25cb,0x3bc6, +0x25d0,1,0x2aa2,0x8f0,1,0x25d7,0x25df,0x25e6,0x25ef,1,0x126e,0x25f9,0x2601,0x3bce,0x2607,0x3bd6, +0x260c,0x2613,0x2619,0x261f,0x2625,0x262b,0x2633,0x3be0,1,1,0x263b,0x2643,0x264b,0x2651,0x2657,0x3bea, +1,0x265d,0x2663,0x2669,0x266f,0x2675,0x267d,0x3bf4,0x2685,0x268b,0x2691,0x2699,0x26a1,0x26a7,0x26ad,0x3bfe, +0x26b3,0x26b9,0x3c06,0x2aa7,1,1,0x26c1,0x26c8,0x26d1,1,0x1278,0x26db,0x26e3,0x3c0e,0x26e9,0x3c16, 0x26ee,0x2aab,0x8fc,1,0xfa09,0xfa09,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,0xffcc,0xffcc,0xfe02,0xfe02,0xffcc,0xffcc,0xffcc,0xffcc,0xfe02,0xfe02,0xfe02,0xffcc, @@ -512,10 +512,10 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={ 0x311b,0x3009,0x311f,0x3123,0x3127,0x312b,0x312f,0x3011,0x2f09,0x3133,0x3015,0x3137,0x3019,0x313b,0x2ae1,0x313f, 0x3145,0x314b,0x3151,0x3155,0x3159,0x315d,0x3163,0x3169,0x316f,0x3173,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,1,1,1,0x3177,0xfe34,0x317d,1,1,1,1, -1,1,1,1,1,1,0x3183,0x3189,0x3191,0x319b,0x31a3,0x31a9,0x31af,0x31b5,0x31bb,0x31c1, -0x31c7,0x31cd,0x31d3,1,0x31d9,0x31df,0x31e5,0x31eb,0x31f1,1,0x31f7,1,0x31fd,0x3203,1,0x3209, -0x320f,1,0x3215,0x321b,0x3221,0x3227,0x322d,0x3233,0x3239,0x323f,0x3245,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,0x3176,0xfe34,0x317c,1,1,1,1, +1,1,1,1,1,1,0x3182,0x3188,0x3190,0x319a,0x31a2,0x31a8,0x31ae,0x31b4,0x31ba,0x31c0, +0x31c6,0x31cc,0x31d2,1,0x31d8,0x31de,0x31e4,0x31ea,0x31f0,1,0x31f6,1,0x31fc,0x3202,1,0x3208, +0x320e,1,0x3214,0x321a,0x3220,0x3226,0x322c,0x3232,0x3238,0x323e,0x3244,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc, 0xffcc,0xffcc,0xffcc,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffcc,0xffcc,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -560,13 +560,13 @@ static const uint16_t norm2_nfc_data_trieIndex[9776]={ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,0xfe02,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, -1,1,1,1,1,1,0x324b,0x3255,0x3269,0x3281,0x3299,0x32b1,0x32c9,0xffb0,0xffb0,0xfe02, +1,1,1,1,1,1,0x324a,0x3254,0x3268,0x3280,0x3298,0x32b0,0x32c8,0xffb0,0xffb0,0xfe02, 0xfe02,0xfe02,1,1,1,0xffc4,0xffb0,0xffb0,0xffb0,0xffb0,0xffb0,1,1,1,1,1, 1,1,1,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,0xffb8,1,1,0xffcc,0xffcc,0xffcc, 0xffcc,0xffcc,0xffb8,0xffb8,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,0xffcc,0xffcc,0xffcc,0xffcc,1,1, -1,1,1,1,1,1,1,1,1,1,1,0x32d7,0x32e1,0x32f5,0x330d,0x3325, -0x333d,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +1,1,1,1,1,1,1,1,1,1,1,0x32d6,0x32e0,0x32f4,0x330c,0x3324, +0x333c,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,0xffcc,0xffcc,0xffcc,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, diff --git a/icu4c/source/data/in/nfc.nrm b/icu4c/source/data/in/nfc.nrm index d92f277c25df9f5f9f054daf0f81b35be0d328a5..bb2e35639ad5bcfca4b5eca2c6d67c9f78d39520 100644 GIT binary patch delta 385 zcmdlng=xnWrVSm+{C6}TX}-{Wr};(mmnMT2+hj*m)yWq`MJ8WRc3|hx64GL1kN}Bs zF*3*j*^CS-T9bD<=u8gKlHk_TGSD&u3)(=niA}zsQo-z`DyrCg zQR6SW;56$1p?TIvm6lnbR$6C$QEA)c`v&!l$0o-cN~fN)W>>pr&7&r)CaET`rmCi` zW~}zW8f2-pnzNd>TA*6E+AAPGRxMdAQ!QVuRIOI+i*>77x7tLtnQ9BwmaDB*`)9pb zZKoQW&2q524yv6{yQl`VTLdTmuh00?;9HPu$38_{HrmXd_YvhtI4p#aDw41 z!$pQG4A&WMG2CT%!0?#i8N*A4j0`sn?-@QZd}a6n$o>Yze+-$77#TQ>_<*E{k(7~w zk(!Z?k&%&wk)4r?(PRT{gU!qA(pY)Dj6#euj2et)7;P{*F>pz&7&r)CaET`rmCi` zW~}zm8f2-pnzNd>TA*6E+G`*`RxMdAQ!QVuRIOI+t97edx7tLtnQ9BwmaDB*`)|Ej zZKoQ$&2q524yv6{yQs#+1hW8Dh8ooCC2xNZ;;=hK>MvM%cM*Kih)JWP$ z(Ma7$*T~q&(#YP()o8MTw!!9Qc4@4<-bSHDnMRF9GmSPHot!M`GMD?h(LD360X?`5hEx@jb3Jiy&y%Sz+TWO6H@EC#gFHl=Q;2k{{K)f8|r2I z!uEm&Cu0g4O45{YyPYl0R_DQlKcauIpJme)qHL6pAmY#p{b)`ZV;51f;s1Znq*iBu z4(489mn$BOp0XHW=TM!o1d>Zg@CtLf9F>n(Rt*r+`+a-lVv_#lSpwg5-N zuB@~nG>qbj$B{&#!ZwRO2m9GkG;U<4-U>Td%Nkd;OLPTv7B$bFA5ii$73HvFr*hP> zSLt-@r)?|S+B%dD5aWip8265gb0@($a0xJ`wV79e4!V>(;GA+FeCH-OGyf1g0fXSO zGPA>5@DY3gW8gQqX5=}%jnC(&01MOLfU*EChIz0ME`y~I!?02b5x`~n5SQQ`!QH__^mSE^#jLjBPHdxpstU?_)iA!JKEZ=HqQ=x=JgknY(`u@grG3Uo z`-XqvDQxkidu*OOkIPf)snRxUyEN=Mq}Ny1TT?a@_jCijAubY*kalu{oFUz$hg>0j zWPscw4@iVOC5-7Mc|+clPvooNe;Aq|)5bYVNEH}UhF}wN1&2^16bo*lT&SRPYp>=$ z)*fqpLXw+8lH0;Dmlt{%Fm3&18*POcgW5Y|$Yinpd}#nWP8mPBMKo z&0CW4QhX=I^-6E2+0rc~pf7wI%c4?D8j={(u%Wm#B8^Jp(xhbeS$*lgg}!WGfv>%> zz43&>DT9zfPZBEmU$fE1sLsnT80MP(xBhg>+&BKXf5M+37s(ZJfL_{uaZZ!GS8kU> ea=#pvjd9o0u3{^Fzs0Rb8k#K3D}9;vZO=cO?HySF delta 811 zcmXYse@GKy9L9HDW9qU}(J3oz+WX4h?J{v9>s>#VN$=q#Z#%J7!nr#W5s5)Y5wbr@ zNGeEP!XiZ?gkng^Au3TA5f!q`4k;0}h|wP+F)Bo2+PTGt=Y5~&h3Dn_9eB(RJZ7Iw zS!~UA#t_n#rY+?_MvS zdB9dkht=?2i)q(Q)Q9@fAbNzJpr>ezdac#;!~Wx@=jfGQd#mTNj9BUe4bd~Fopj1k zXr4#QD4u#;NfnM2Y^NW=Wj2A<^lVoz!W*o~?WlFsJAx{UIx=&D(!N|t>Mm)Qu9bwP z-jXXcQW=T#N;iP!{NmJ{i_W;S58ML}0b@9l8wD!plP1A^=_UB-Tz9hE8!!zt@JQO) zVF7#xDZz=)o+ISXgwl7NL~I4 z!w&df3TMDs=@P8v>S&Lvf@z`Su3F}SnsgBc3;kPTb3c)%@T5GAHT+KgBroA5IU#SzSxTYu13Q#e{0Aqo z$+Oc_;3@VvJXM|=rAcX5u%}yXJkgk8Y$C3WX8P2Z&qqiPxlQ`X5E&*>GD^nD3-X%0 zCo_aG%#j81jr<_1y8oqXoowo-kvH>EGqg_gnoo|Mk}Et+#aU>I~`( zr=db%o{hD}R8E{_m=Sfe@vbp@ArKF&2XaKa=oN$X;n~3*ZDLsL5eLOFF(&Hc9;bH> VWYDioF7@5H^CsrATCS{K_y>?=9tQvb diff --git a/icu4c/source/data/in/nfkc_cf.nrm b/icu4c/source/data/in/nfkc_cf.nrm index c6b1e1a8351d810ca381af56fd0a272a91052a1a..275fcaa4f9b3266e04c41250b592f64dfee27ec3 100644 GIT binary patch delta 1401 zcmY+CeM}Q~7{~9o9#&f9r8n-b*D4g*I6zUv{px}4T8%(BkFG5u3hT_K_!0&zj)ob< zmt}~P5cLN-4G;!qL0zoz&`hT;Ffuo?Wxh-z&Sc6z+hTBks1XiydkVzNeDnF;^E}_@ zdG4jX(_*;OVmLd~<|twjwGKekoFYU8CZmn)7Pf-jHW>#=s_iJENv|y>p4R`3q;=Xv z>n0!;U`;f6ZsQivdXA|R`n%a~me$?i#HF010|*q8)A_jt_5Ibh23GL^-sZR z0BVi2-UQbcV6TxTJM9T@$2R*P60K*Nr>3x()|UdmLGMbqs%F1g;KG9KW(OyY!e*$K z165{PJ3spXGM8`xZUeWKtLB25lAt24g6-huji2O_$HPL@1n^}12?-_cLq8LzV{H~z z-c~Mhxylu;m+R*Wm7lm@xEtJUu2=~u<;oB@s*i!f=jgO1$oGzm@Vq;dT4-ATFp9Vw zRv*0QwkTTvBx>sEcSTWpBDkMH?Er9Sx_%VmHlPdqWbjWy90JY*-vYhBwW(fh&)iT% zjLuQ~3_gmt@H{_vm?r$Ek77jiB{$$lIKr`S2lPX#I2GAaDd|1-gKf zKnUmoE&;tUG_fxE4mJ!eNjF~}dnpnl#T%zHk}g?a@jO!eD|yxbdJjmp$jf7hnuLU; z_>`ABVf$@MrSiKas64RLDSuk(l@Uvm^4M}j8MCyLhf>Y1aBreosRV@`!d{?WXcR=X zSy-eZDvnvI=Bh!ZP_0v%h2uh}a7y5q*~}YE3Al)w&%DKi=igWB6-1pAE(v|YGG+y% zFcqqwSJg=XY(VJXZVBPJI(1NJfDKj5;b^s0ZBkCC?aH7~!QLYy*_mjV zB;>3_c5P!$rr9u%bX_}^`>Vm)=V8SJ@vbLDTqsJSTg(^z;zQ3f&kVAsz-DZG=S%Ul z*g?7q9J3dBhs71%5%H<$^Okw3)Xm=Q-Ue^8x6>P$+Cv@`#2fE>AA8?TouNH1cpCYE zfET|=#3}fi?UpSaFTh#YiF2@wOKr<>J}$(?IDj|ct+*No@eaHfH{d3WsAhZ|ci^w^ z*ASlrzQI@F?8gIeUdMy@9v;Sj;Yauh)-gp^uRd)r@)h~2ol~lDPN~*e>n!t?`N(c- znwDD}i{iSaGg6OqN$QgZq?^)RX_#zUvljJ`-Zh8NX5uf&jA+bm&TiHAm+XJdXtzy@ zSWEMy&2pvuv5ctgz;3x-Zj=wpU&!t93Hg+KR=yx#k)!?5eik$b^d=}{O5|ASkfGRL ztksm=Kq#Q)1>%jfuDX77-E`e|J#tOBOk~T(HZ$)|a?f*TyYt<}T3q?EnP|P{soY=o EAMPx# delta 1401 zcmY+Ce@NVB7{~MOE<`VS>SZRmoX(z3jEe1?-Fr=~+1)A3TFoQC9}I0CWoc7qL#!mk1lw7+ zpgf1o;GY7om8Ffz^ghH^3k^b}a9TJcv>RH6j)WF|R4CXQ7qYivLew^}v9&>>`?}Fx z;&E3Oc{Ej<63Vn0VNUo}@N0L3yTYPyPY7!bT9dXcJU2H%vGe(!w#mGE3f{LfGh(I9 zmIUH;JA)K$o=u?5&k|y9Pn%r{l(`SwFQL{4T-mD+LOcwNf&V7>;}A!INnje718(p3 z8t1d3bkdf9L`F;$IngO*h}q=KekZx?+($Zcy-dDXC?d+USF^b><%HO@=Or6y`8^h9T#Sl0#TjE_za9sl>v1A&t^>~! zt>z2BYKI@ePLmXm0Iq6>j}DdNKoF<`>cRDSir{kEJXthtZb3fm@fW~FBw5X`fXl$Q zz;{V-3BWiIh5TpW7l`MQXyQCH3mcXX<+C%JFGZ4+RAw(D`HJ%u&#$DulDGb^w@-1! z&uk)U2NG3Mb6)O*?e`q5+KQuHTXl44|2Vp}HOD3Gsbf&vbc~QEN?TW~*R8i|pGuw5 z1)y8%mt=iFdRs?S3U@>=)7v$_-lYvlSEVs&LK3)iE}N?c7f}V=J6vr3q~5I|%8;g| zd8vdeA4Ta#V{svCS;$;8hK~JianAdBIYVCY=g4o#itLl$l|%9q|1&>B&R4jY z{`W@Z33-%^Rk+iO0)NY;fi-zU4hCuiRPINCbAg_~K;T*+K6jF=R-`hkfv17GT*lb0 z*oZ&r!UtctaSp!iT5{#%1Gor#a4}YKjjI%wV?Pe#2Hc2G<1@G&cjAk<7hl4Nir}kw z6n~F@fY<=0@GP9S@B*BR_)q*0Kf;gkI^Mur7?H}#iL}aKW$+WvuG&1i>hN@U>VkDa z@`W?cIQBs@N*Pz8%A_)_%qt7ZlCq*aB2D3HG)d;dS5PwvRTa|xha-oFjozx>*O+wI zj)=3SOlekI)ixDT=YaESx7x1~bx0jiud5U4PwGu|Mzx3RAs&H-V^ diff --git a/icu4c/source/data/in/uts46.nrm b/icu4c/source/data/in/uts46.nrm index ee765dec403d717c4118aebc1094c6d165d85216..562a67b27a4c60d07903fd4273a9b3c9be993c0c 100644 GIT binary patch delta 1562 zcmZY64@?tR7y$4d9Ho?s+^gJOPe59btL0Asd$lNDkxfglwe5i#G=hd0l?|}|nc|F+ z;S}S)fG<<$)G20cPMyTVETW6zrtY7cQx~V3ahgmW*(P&N;Xt>qK<2XL%kSOy-uJ!t zdcDmzBRAiSYzvvqWi+9w0EFgP38ASR$g1cXIz+D+$X~_H<{boy$5?n6WsA{Gs!4iM zwde_z(2%MX@@!xS6ZHFFPb=L@ljY_@8nn#KTRDOMXHNpNhK+56A?Ru znaxx(3z=GGnWBuIN-!aMFO%8#E|b<54w>!;kNO^AhHoUk#BcL6$A0d)?D>$n;W^2i zX3jEqJQtWRn6H^DOqb`ONA+A|demNs@YpjH{W!yVf+W=i2HeIc;HTErFsTVoOw-|I zml{ZQrD5>r)*Ccx3RKs@+9uFEw7vuKJ>US;)1ZD3@}uB5I0-tzg`wVy1{^%^?s}b-&1PlV&f@sBc{uc1JU%08ad)midOR!> znnFA|Z?a(u>tjn;LL<{UUlX_8}6h42v^rBQERZ2^w25FnrB8``@O{y6958AyE z(jC)%({JFRsmFB6tKrUjWtuWx^4{?3r8{0EX*ewx$I%>POfrr({sL8*&N2$d@OwV? za&Sd5m(FE#6OAtzy+)0%)Hu^P$GBjiuJ_rbPkJ5PUdF|R_aR>qm*|t}MaJ|vUyjcr zIemFj5f`G1ae_BD!q4H9l`Nj?%_Np9TfFkH$X(`r%I&goq-lo#Zu*1tTK|)D4Ijf( zJjgtM_XpyFi9u`75xmY9 z zf?t>>%o658J|9#IOJJ-M*1-6>uu*6dwhOz37U4bN1EC#HeCbf){owsznqx4=I0hrr zk?H6Tb_a3yDiPu!k5-*m2DfUV~9t}U(H)){|z0KN_YSO delta 1562 zcmZXR4@?tR9LIaoqopl>?oRHm2MkMv11oBU2Sw#NbgsK=r9BBz;=nmv2%7{LF-VLd z(>TnSoAqVGg%SLdX`CStPqH{LibjnwMi*wBF~m7D4I@sCA=pE7zXEap^3CVH@9+J7 z@BQxH`+Owk^O2ao28W}9B@9&nVK}*#FjP;wTDFdDWSgh`b{uwe5G2mY5Mfj^7$2#p zS&Vvyh0qCuQO^dy2*|gP`f|%b*mIOU%945s#6C;3A^@%oD%M+i*dBIfSCxgtCo&v% zQr`ucJ-{}TQU3xezXHG0MB?8w(xD&eybCpM>&&hKt5IJ8{A?oi)m9QuIa5edueXv~ z+WdCHx{j;ls<~ROp4+NvnQI8Hk?rPkCy#NNlToSc6fizHjybW7xQ4sMioC1xu>1pe zM;_qLa_71G@@4KScbyyN#^h)6qn zas#P1*+{*`7X4gOZ?zfpd63-=YwbXIcD)097jOjf^C5p6d@s-s3;;vG<=NV6Hk_2R zSqm*V5StX?UU4LEXrmjr(kA*H-Kzl2~Ys*Ucd)F z0IUVp162tmuJz@?3wQaP@P?BRU-SiY;j$8q`e(pC;A`O9M7Sv6IM566Dd0!&Ly06V zdFL#gK*+AvGm*b_zw_Q2mt3s_bB7@O3sCpE6%EpQWx)!_)}%ka|k}Gao(W z7xN$HNs6QsX*=ztbLs!)Uzja%g>(t+r_1Tt^IQMbfR9@cw~?-;o9G>M8y$wC!HCM9 zdCiKr(zs6gJGz%XO<$m|{jb4II!3ueTa|k>qdcTjlt;8vc}BaG2%V=y8!l7cQr0T( zD<3O6mHo;O5{bS*jn0N+KJtjDJDm-QQR&fTvq8wyBs_z-U^-(FNzO> zPl7kZ7s13(YA8EY5V|eOI9!=*+8SyLb)zjwr($j%!wp0+c zcxX$QNW*1yZnC6EE>iJOU6FRaE;Pr~EKCdA8+=7}DP78x2!l&fS+`Uu6-%omSqe(! z(gtahR3p_%ht%A@BUEZ*_+uP$E pz=ztpY*)O)-VyJZH{vzDi4Uf)#5Z=wYWw$Wi6h3fSK+isNormalizedUTF8(result8, errorCode)); } +void +BasicNormalizerTest::TestComposeBoundaryAfter() { + IcuTestErrorCode errorCode(*this, "TestComposeBoundaryAfter"); + const Normalizer2 *nfkc = Normalizer2::getNFKCInstance(errorCode); + if(errorCode.logDataIfFailureAndReset("Normalizer2::getNFKCInstance() call failed")) { + return; + } + // U+02DA and U+FB2C do not have compose-boundaries-after. + UnicodeString s(u"\u02DA\u0339 \uFB2C\u05B6"); + UnicodeString expected(u" \u0339\u030A \u05E9\u05B6\u05BC\u05C1"); + UnicodeString result = nfkc->normalize(s, errorCode); + assertSuccess("nfkc", errorCode.get()); + assertEquals("nfkc", expected, result); + assertFalse("U+02DA boundary-after", nfkc->hasBoundaryAfter(0x2DA)); + assertFalse("U+FB2C boundary-after", nfkc->hasBoundaryAfter(0xFB2C)); +} + #endif /* #if !UCONFIG_NO_NORMALIZATION */ diff --git a/icu4c/source/test/intltest/tstnorm.h b/icu4c/source/test/intltest/tstnorm.h index 2891e8c98ee..db7edfbdf44 100644 --- a/icu4c/source/test/intltest/tstnorm.h +++ b/icu4c/source/test/intltest/tstnorm.h @@ -52,6 +52,7 @@ public: void TestLowMappingToEmpty_FCD(); void TestNormalizeIllFormedText(); void TestComposeJamoTBase(); + void TestComposeBoundaryAfter(); private: UnicodeString canonTests[24][3]; diff --git a/icu4c/source/tools/gennorm2/n2builder.cpp b/icu4c/source/tools/gennorm2/n2builder.cpp index b457fe216ae..d3aad1214ce 100644 --- a/icu4c/source/tools/gennorm2/n2builder.cpp +++ b/icu4c/source/tools/gennorm2/n2builder.cpp @@ -209,7 +209,8 @@ void Normalizer2DataBuilder::removeMapping(UChar32 c) { norms.mappingSet.add(c); } -UBool Normalizer2DataBuilder::mappingHasCompBoundaryAfter(const BuilderReorderingBuffer &buffer) const { +UBool Normalizer2DataBuilder::mappingHasCompBoundaryAfter(const BuilderReorderingBuffer &buffer, + Norm::MappingType mappingType) const { if(buffer.isEmpty()) { return FALSE; // Maps-to-empty-string is no boundary of any kind. } @@ -217,6 +218,15 @@ UBool Normalizer2DataBuilder::mappingHasCompBoundaryAfter(const BuilderReorderin if(lastStarterIndex<0) { return FALSE; // no starter } + const int32_t lastIndex=buffer.length()-1; + if(mappingType==Norm::ONE_WAY && lastStarterIndex1) { + // One-way mapping where after the last starter is at least one combining mark + // with a combining class greater than 1, + // which means that another combining mark can reorder before it. + // By contrast, in a round-trip mapping this does not prevent a boundary as long as + // the starter or composite does not combine-forward with a following combining mark. + return FALSE; + } UChar32 starter=buffer.charAt(lastStarterIndex); if(lastStarterIndex==0 && norms.combinesBack(starter)) { // The last starter is at the beginning of the mapping and combines backward. @@ -227,7 +237,7 @@ UBool Normalizer2DataBuilder::mappingHasCompBoundaryAfter(const BuilderReorderin 0