From 8a320205af070fe24cce9c3db222f8e9f46b1617 Mon Sep 17 00:00:00 2001 From: Markus Scherer Date: Fri, 13 Jan 2012 01:15:48 +0000 Subject: [PATCH] ICU-9039 simplify/optimize ucase.icu encoding: fixed trie bit for Case_Ignorable; ucase.icu formatVersion 3.0 X-SVN-Rev: 31204 --- icu4c/source/common/ucase.cpp | 13 +- icu4c/source/common/ucase.h | 41 +- icu4c/source/common/ucase_props_data.h | 918 +++++++++++------------ icu4c/source/data/in/ucase.icu | Bin 22828 -> 22824 bytes icu4c/source/tools/toolutil/swapimpl.cpp | 4 +- 5 files changed, 482 insertions(+), 494 deletions(-) diff --git a/icu4c/source/common/ucase.cpp b/icu4c/source/common/ucase.cpp index 3763e1e6514..96d3e5d131b 100644 --- a/icu4c/source/common/ucase.cpp +++ b/icu4c/source/common/ucase.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2004-2011, International Business Machines +* Copyright (C) 2004-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -403,16 +403,7 @@ ucase_getType(const UCaseProps *csp, UChar32 c) { U_CAPI int32_t U_EXPORT2 ucase_getTypeOrIgnorable(const UCaseProps *csp, UChar32 c) { uint16_t props=UTRIE2_GET16(&csp->trie, c); - int32_t type=UCASE_GET_TYPE(props); - if(props&UCASE_EXCEPTION) { - const uint16_t *pe=GET_EXCEPTIONS(csp, props); - if(*pe&UCASE_EXC_CASE_IGNORABLE) { - type|=4; - } - } else if(type==UCASE_NONE && (props&UCASE_CASE_IGNORABLE)) { - type|=4; - } - return type; + return UCASE_GET_TYPE_AND_IGNORABLE(props); } /** @return UCASE_NO_DOT, UCASE_SOFT_DOTTED, UCASE_ABOVE, UCASE_OTHER_ACCENT */ diff --git a/icu4c/source/common/ucase.h b/icu4c/source/common/ucase.h index a2a93cc2cb6..75594724687 100644 --- a/icu4c/source/common/ucase.h +++ b/icu4c/source/common/ucase.h @@ -278,37 +278,36 @@ enum { }; #define UCASE_GET_TYPE(props) ((props)&UCASE_TYPE_MASK) +#define UCASE_GET_TYPE_AND_IGNORABLE(props) ((props)&7) -#define UCASE_SENSITIVE 4 -#define UCASE_EXCEPTION 8 +#define UCASE_IGNORABLE 4 +#define UCASE_SENSITIVE 8 +#define UCASE_EXCEPTION 0x10 -#define UCASE_DOT_MASK 0x30 +#define UCASE_DOT_MASK 0x60 enum { UCASE_NO_DOT=0, /* normal characters with cc=0 */ - UCASE_SOFT_DOTTED=0x10, /* soft-dotted characters with cc=0 */ - UCASE_ABOVE=0x20, /* "above" accents with cc=230 */ - UCASE_OTHER_ACCENT=0x30 /* other accent character (0>UCASE_DELTA_SHIFT) #else -# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfc00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT)) +# define UCASE_GET_DELTA(props) (int16_t)(((props)&0x8000) ? (((props)>>UCASE_DELTA_SHIFT)|0xfe00) : ((uint16_t)(props)>>UCASE_DELTA_SHIFT)) #endif -/* case-ignorable uses one of the delta bits, see genprops/casepropsbuilder.cpp */ -#define UCASE_CASE_IGNORABLE 0x40 - -/* exception: bits 15..4 are an unsigned 12-bit index into the exceptions array */ -#define UCASE_EXC_SHIFT 4 -#define UCASE_EXC_MASK 0xfff0 -#define UCASE_MAX_EXCEPTIONS 0x1000 +/* exception: bits 15..5 are an unsigned 11-bit index into the exceptions array */ +#define UCASE_EXC_SHIFT 5 +#define UCASE_EXC_MASK 0xffe0 +#define UCASE_MAX_EXCEPTIONS ((UCASE_EXC_MASK>>UCASE_EXC_SHIFT)+1) /* definitions for 16-bit main exceptions word ------------------------------ */ @@ -328,12 +327,10 @@ enum { /* each slot is 2 uint16_t instead of 1 */ #define UCASE_EXC_DOUBLE_SLOTS 0x100 -/* reserved: exception bits 10..9 */ - -#define UCASE_EXC_CASE_IGNORABLE 0x800 +/* reserved: exception bits 11..9 */ /* UCASE_EXC_DOT_MASK=UCASE_DOT_MASK<xYZh?k`~33mckVs^ zIp>~p?tS;&`%;ypH|-yA67l7t>&q`2uY>bwLLjOl)V>hWD;E)+;wHM)p|h>BbL?yM z^joJ81q&YaEYfxs?9x7P#a(}N{oVBs*Y_^3-bwGOTY7(exc*c99DRa*i9SWYTCdcr z^*Q?O`h0zfexLq;{;0l1e?i}&@6q?^|E3?*59y!jjrzAb89u{fbTqmcJ&ZoaU}LmV zW{fi~G$tEUjYikEu4{}NjA~fA1u`ewb*buQ<>l_h+bgOS${7`>0#UqiYs^<;QyPl8H_Ya=Gp#H-1 zjpsX$+uOl=vbU$V`LU#0^!E1m4sCU>R3EKJKk=4&&-RY@cE!r=s$cBA-1|%9a9tvg z8EAK<%x8Lpca2+xZH=hIiGJx;?YMmg+Gn7B2HIz!eFlD9GY~nUC!Tx{an8_k`0uK( z^*$uxJnr4*eHmx<4(}fCK1km4{@(kyG~**k<5o$sO-ppDBLo%Rj=PZ=!Efvd>5OY5z)J1@_Z> z#jbl}Qj_0HZtb3eJp~5~PA$B;a9!c2h21(#>ae22`yGlpPU^U#FiS?Rae8}hb`GZ zv2Lr>Na>i0Y*nhjOs zN|2*b`4|+%*9HcP(o2m!4^6tw`-^Ig2E&OZPigab#IWa~)MN9xI7*w{$XRt1`Tj&w zgU9yQFeU1xqwCPgUz;zd6Pq z^LHxoPKx? zRM{`1Z|ziSCi%_-|HUKA(}6`O0#s78@!1hu&yT)pze65w?jMT_Ib z8f{)1xnH9|XQ^jC)H$+-ktu4`5B9>wTrG5>mR#fyk<};%ZOh0izC$*aTuFu`-GZ?j zrJtyoz` zo{`8*#kMV!M7FlCbWW{7o)_l%`Erw4&P6?I+_K)^1vZ_zU_b4RXVR*(qFww&Whe74 z+aT()Q}&-y+tpvx1maTLt*IMPZGibEQP*~7-uxmW%8~A7WAs$scLP1Sj`l3=74Ky> zhAg=k$E{HBc<<5@Mln&B_7R#x5fr}>>l5hf!gtj!iuDbgW*$@HX=z(?TKu$#UP5L4 z;{D_GW@FTFzx=-CSQDrK09+S^yQ= z$~>H&*-~bis}GNgud)HP0~!W24JaE}JFsD3)4;MpwSyW4H4Q2oTsyd7aMR$jA+3huVk%h z<=NYk9z62qMw{+O&X$^6*=E|XB-MsdWn%8s9Lj#d^zIIv-M_;ijg|K%P1r)%x|vAL zj8W#5D&x1H*2PdiU1fUCFX6Z{KUK?BE%Q+_)jEB#Os*fhW_@MSpVPWEJda#Ol=<1c z8CTe_4IIy`oiTH{F3#b2FEtMK!+b?6?`vhEpUi{vksd4Z%z}!kTJGEWr&mRiUS$0I zwVYL60d`@TLOj!om#y;0qqHx!%XVe^thY5X`*Yj2bNv#ZDBB)#XW*vNQBqqYTGdDS zP_2?T*XE9tsVPZ*Qf_-~*{aW+dA?UcL8`Vrv4*9x*z#;#?$6m$zkkSO*D$JdbS_;> z<+0JR(_=r0oe}$K>}Nb{-OtSRT}sPx;<%=fTKvaSi&M+9>(nxK>X~=ynQx6goB!CE zrV%SOjldYws5#5TCgMNlq-Op1GyCC-cUeZ`3;3}pXQG^Ip3Cv#XUEQoo?A0ER9-qR zaNd#g4g|N4Uq4~)KV5bHmfMX`y|i)$_k{k(Ki;L;wZ`)cm435Vtph3*N>_wPj6<=?gaUe@QjWZ!HS z=`!g<)v8}EzqVXiI&-jPEXCIkIWPR7T{D-xbEI?4bWEt#&Y<=Jdyi0A2F1NI#O@Ik zJB+kZmd(d;B~2u!iSKP(V)v8Z;qdo}%1TPOc3zC?7f7q_3+oq3z4YYo$iCDiIo>x? zeaLtvRH6*Y{3^-jWc${C_rg}+1d#XQ`6X*Q6AX9Uo>@5{i67yK*V{1OpVk>_D8zc$+JP#>Xhw=cYvvN^>wO;y*_x5N-9@Si!O^V3oH*Uk1mfd53C5Sh^~mQ2&@dP zjINBY46F*Rimr;U3ak#Tj;@Zc4%~;G38b#5gIBU#t#kLtW>YCnc=ZgorOobDKI8H| zR_zDQT%3G157}hwQ~AoKZ<$PbXqm0F%Aad~tsO_LoNO)se5dltb`SV0%hoTS%#O@l zFcoG&95>&kX8w5EeEIV8mrt*BZn68Tr`6eO|9#5F*~)xb(jC9AVn4yT#_H^=_^E4h zS?8AXshYZX@LiNwWwM)+Bn3GcyH8G&tcq;wDP5MeqeE!lbNO65RL!ty)zE;sn15bwq;m)&lRIBzx5;Ivd7Hd+s*U4K*`_mP0u@5Zs~T6?Vplc&N<(_r?=?&7bRX$h`Am(-cRk%QU6L8GrT547WOF=rIew>1W~FjeZN50^ zS?9vaSFSc!KP7ke%QjoeE_a(be@>s-`P4qSedp4o_Rk(eNmFfec~o-QE4@$pPa-^1 z*{}4SE*)p@l_@RXIUVb0q&7;^#?GK}nu#4_a7j7MMtheQoOD4#8>EYbCP`Xk$MoZR{7c6R3Be+30%v&R`! zs9%^MlMZWN!RD~`CE7ybTBB)0HL4nF(4C;WKsSSCfo=gUecYfYLDzx52~;_hDkhAK z=}}#yS1lgt$kSuGPuC!^>yNB*dJY!vV{TqV`wiMxbQgV~e+@1MZoK{~Y#N|zgk%TW z7cuWU(S8EF3+Vbl$JJ?dThM&!!lU8jEl{Gz@_=Q}YC{|W63_hisKH`e$ttbULS zq=O8Q8{`3bK|YWP@`DOMg`f_gj-XDUB2Y1?GpGybB+$vAQ$Sro-9Q0QcTf+|si2;q zULXt98&m@71L_Mp4b%@b05oivL3iM;Uq7^*uI;7KdaU0H(egNMq*40$31U2leEO6G zG5!tt^W$Stn^?bB@1j4Jpu4fY7?Y5wT1hn_ zRkf0i6H-;rr$tt87g<=PR#cx)5g-l76sQi!1>_fKF^~>aAkY#Z1E^4-r9f_=4gxI$ z@&I)dXgQD(CD2x&DxjML+6Ht3(9J+$ zdKgH>3)4m*6)#M`1yb?C^azlO7p6ynRJ<@f2BhMJX%mo&7pBL7R6Jhsw;*-{VmDah zu|ju1`>Y<*S7J|P?H2G0z%S6p3;e7;4mwG^<{7i~IM{GK9%Fdk;bQ`JT;8u`?>UI| z4r09{V(o+7%jmr}5o;^>2>3`M*30m>HbESVFX*flmLh-Yyx{5+N*<&P!oU)F4OJ7%W| zv(qGI=NLxzHpaUd*|7Fq@XNt3Pt4BS=({;V?2|EDbM{>G>>PIZkSEw?oDTekaS={D zeskbAgPAUk?&t|}8B_*32l!IZuR)VQlR@W##zOaSkwF_lzXd%4dKB~+XcOph&=a7| zpeI33fwq9225kjx13d$J7W5qGdC&`>7eOz9UIx7aih&wH+d(@({{ngy^crXp z&~DHk&|c7+pnomWs0QPhhFq&$JZDu@MH57+61#g9-gPG6oo51Csjmh<5mXI4@^rRm z3u(i=lCqt&tb#4yeSXWDgLkYcpgH6@nkjQ1S3A;;Re@W7Q7IbHWX1OoGXXUzhh2(P#-`DmcWL(9Pqmr9WdV;GaLvZ+Y0n!9l z4bBCQ&rqWK(RVs|!Hv|2_)U$d5gea{gfnqNFCHKMX7Q-!D4()KYte(xSE6;`_+%xD zg5&o_q6^TMcMGByEFylEiS~fw9fIhk1Q*j^794Azw{}?kOcT8UnS8q66dY@JTl?@l z@DukTWXx&$3jHG?8>jzX$hhw(;P`nX`ZKt_qQ`$GI8A>>|C7bCar)ncjO`{VPM@6M zG<`~f8>e3(IQINkaQsve{ZE36>HiQM=XcmTX7Mvg^fhGsenqqpGza&U=p@YZ9NbZ& zlLa>mTvx%}2CfS@zR&nizu*XCBAncdEKP7>D`L3>HxGXYstfKOYq4brZoaj|atrQW zYpLZC+yZNv#@`#s@4u3kj#< zgBG8ISR2*`i_bvZbL%0C&m+Rg^n}G{5#dzYZ1FinIFX*T_zWVPMo(FM{t!;0Ef$|W pgj48gi_aax3AEMXGly{cY_s^pA$kt8#^(Xi>)?3rBl-&5{{k#JI~V`} literal 22828 zcmeHP33OCdn*LwCrK(6O3nU~eKq3A2$vMP_k1n5hVW|(2r3r=Tve2hrO!VS3qI{F+8kf!SN-XJ6 zkFL7rBBGMChpk(+Eoob|w|#%~eeCK3->Lsrzh8e?U#~x_Z`QZ#yY&CkKh*c>pXu%T0iBG1VHp`lKjQ>ruyL9( z+BnOYWK1}##-ZXSY><4*kt_P*k-(L zylK2;d|>P|J_Bzzz7f0w_)SAIgXTUX!)*8U2etcZjRN3ypJtvUc)O3|*fLDmN10~| z{!_CAe2O^({m;$W=3KK{=&vzje=f8rnSFZJi)%FWX&{6F*0!Wb@#j^j%3tugBv zU*Qeo8sU4r=p)g%c&}dEy8^u{(7OV?E6}?FKdu$H?U?R(@;$`8Ld%dpPrui{TGV;e zztR5!_Ug_4?fzZRyy<`6|7qtt*B<|0plkDgjee8yonH%ZCVwC^&<|36pfGTX&^Y-= z#$m_OEA_5G?+Wy;z>jSO&ggvNof8O(PkqT}SQ0odFtan}U3}XA=RhT%r?uj#dqt-u zc@}T$_O$J3AEpgRza)J{`UmOxp_!rOp|?U=88b7MXS|h>l{qu>#>_35l$D=#Ue>Ky zf5-}CPs!erc0=})+3nfseZze}4rcc`zt62&*LF)nE&FZvv~L{KAHwEgBB0Bl#+4`ojG;lX61g({#&YHkuf(NY)f#;8D<;}-5 zTlX__JYMyvyfg4l;Db)vzD~)BRMKaG_P{rRg8@C57VH=NJa9sANN`wibg(VJmj8og zf8eiyvtXGNoEp3!cyX{aSP}eL@RHz^;G7_@@OtE?^5Zf|nHZ7)DT~i#Exj0Xl%mcBlgK`g4XtBEbI4tA;5qh43{3(GYCV7OZ z73#3ixPNMJ*5#1q$W~y2Z??~krHtP&Vpg}t=d8;>-I2W{id)^%f--78d@O0U?&Mpn z?>^k1x9RTu8o7~M9&fnOv6fptc4vIqXfCcBt2A|Egfym1aE)5})s1c$Ep;tpO7mOB zmI>eHQ(zlgHm`(O66uH{qfOSkPvAHS(e?BE&X@LTzA!h`aol#_VwCb8a3}~Mqi5?rW7O_Yk<+W2HFA|?S8%Vb&Wzn zX=S-6*u0Zbt>~qm`wHPxyF=JSFQw-F%{!WRl0lX+nqs1iX1&_u0F!H$RjQ`dnG*(R5hf9>xH8uQ?(rr zrIFmp6)&l4kk^G}zQ3x1wT0YRRtMYba3;!v-0(cb55X;&EtzdWE6~)3pRR{QKOt>t z+MYBkP>?R4xUWK#)iC-yI*%^&mtxS5>7lUnj!W_bLwP;L_djG&Id9eToVmI> z97$=CbCoA7R#PG+E6&;@JLI*kVmXgIN3xgaQeFRSb?nN6E6deYS&oc{5$0TtGNaS0 zoXSIu!@DwDypLoy?5Kk&)!FRggpbh)=A{*j(4wyv)(F-@43WBYz{B4uCei+ zPW`UFT{W_*TIJZMhV*8edQzy|UGv0iy5^;r>%;M9Pj`OD(sRjsRlRcXswwjFaqGDP zuY+1C_nL5~aP%2Uf;*9pzgv=P;nt)oI~9}^e;p`&a<1ySM)qS+?hFNce9pJP5t)tJJhcrXF|^GoW(f}IZZixat`GbR*sIA#Zlx;=G2uro26Qhw=*YC*;r0Ct3_rM}tdNx>UH_ zdwUYWGw*J;@pQ40YJs)U748m53b~qpnA+ zj^Bb(tHEoNY6Oph>&o$|oa@M5%^3E|u#v-84-Xa9 z6txx28qqpp{K&?U1B;gxA1=OX)ZS4uM!zz8617gOm}Kp<_SK;sp2T*qL&hfZh}X#+ zs#kGdc^$p-l#x>J=vaF_epT^0s*>1bjPvA(m%MVu>gDso*=?O?gc@aK^b9yp80n)t zWAi0KzD}i-G?KsE@V%`h{&$D+kQ(mY2cw1OO0Ny)?mt)BWxVD0A(J|!q;e}QN3t#( zO&#WUO7i9w{$*jZW`3W%$;|u&@~+XHrOxWU)qdA~>#mO5p89Vgi6ZdaJgQ$qE$R#a9|R$W$K)>zhB)>ampTr|02a`oi;$&Hg+C$~)wO(~jEF{OG+{glQjty9{j zgvyJ`E6S_O>&qLsYHd7Iw!X}q3X&>#0hU-YFvrdonTEAmrUnWW~nk3CO}*_ z*&)YwbbYe%^1YYLu54Z3$(3d4R3*8&95HJ)Oyzr7*}Uee>~8)>3U{2!%e!jqvxCo~ zJe*HNN)t6CVNOgErv#=%*HgSML0F8+IqUMyjxi<$PNZAv`KNq2YZc3-!qIxgQJ$D7 zuf$fVRau$y5zmD(t_#Ouj_TRbQ@={227d3n7BvImPPUdm4 zCH9Ih);6_zTHCbh^9EM5R#Z=qw<;W(Q9mOzvwmjie5GODvkH7Jx^FUXSh55?ud&k- zIP^dj7Cy4?>fV#T@`?A6SvVs;(TKluO0AC{1!wB%A=gxSTYZkMnct??R9$bb#>d<_ z!s2bA(oytK;ycf|-W;ii;R&z6)U&8+a&BP()wm>**QoM4Qm*oIPDkdoJCZTGGf27T zPx|CDkmF+?ca7q1N9p!XEIW1iZI$P(X3ki(_*m)Z^qj?cYB)KZ?8zh*nasy+JDPLv zd`j-Qq&^!d&Yy*q?8TF1kBuSm**aFIKHn$H($yB9qi5sfPxwBuysCG&w8U3Zb>zQ5 zklzisbxL;g_#E+A`Nme{Va0pKYZCn>b#Rs(;j=$3d4tUI>~qoC1_jSCpN(NnfdJ)MWE) zi^puz1G)x{)4yv^q^IHW7S`uE@Nd9h)P3}}{xzgckV^F?_`C#LJ2acYpTpX30e=s8 zEBF`s#PBxoSAko=cZ2T$A38`@Z5H(&arJ+)OXx0myrq}W-QeGVuZUJHp_S39CA12* zcG&I*e-8g1<3aHEfY*S3p~e+cLypjW}yg6{@j2R?L=g?ZhDzwCZdUjh6zyj%3O z=(p$(qJK$Wg?_8P24m&v{~Ovk^8(OKCa&`$+yNjTNCz1p6J&w> zpa3Wc>H|sxrGrAC3{WO03zQA&3+e~T0p)`FgYrQ6pyNQtgH8Yq01X5cfNW49Xb@;H zXb9*;&`F?^K}AId)!@uuK71lwTA<$QDS^4HTfn+$wL0hf(Y%ULR|6J65E^ge)=O(x)IkGV-gzGE3GE9s#n@cLaX{E zbgNzHBO6z#y;5I7w*hHDL4j@u@&WY`=nfzqC{3U{fefH@f$D)wppZaIfh?d5ftCUJ zfii(2bR&?e7oiA{su!V~fK)~w1zH6(80a*C?gJVEG)$oTfldS(F3N{V8-V5kEfi=Y&@X|m0*cUsK&oDZ z)&Qw`5&9jFsu!V$fKU( z?SkJ6h`u*kYXjulAm0|P^#bzU8zrvAn7z3c_grgROb)pp5bOcmwX*lxCzgx(Y!cZx z<0N1i&nO`D+{-(g^YQau^U@ux?ZAB3VKi)g1M)J+%c85Z1F`F(#5ozWH;9J0yNW5IrB2Ht8=2iLKArn{B?Q@oyfXlXv%PLSpMSdl&8pzT@73 zjwMZBuD>gEll1q6j$_}0#P=J~Cy<&&#Q#JkO@C7Vlg+wG`lmw2elwM%Ul5fv{i3Ke zN&mT!IP+g1@m)pqZB*K%|4m3dzOU@>ZN4XozJ`w9uZV5|Ey8&v%E3A>!WkvX6;d^% zJRw~NsUIXh&qVzp5#~hLxo@>KAw}%lY@d*BvTwI_A>C}>VH-kPV&7?-LaMdvZA(aX z_EOs~q+4vf4+v?wy~5_bk+4gyw0U17?2xN$-V+JC<9#;ohlHK+ew+6~!mjv$&HEs( z4ZFeSJrL*IUTyP!MA(@gvw1Hf>`Low-iHV~(&IMoL4@7t37hvH!cMf_=Dmlo3pLuj j?+|vN4L0vNgxzPO%{va!(^xg$4~Slc#LqsWFCqOuxib(> diff --git a/icu4c/source/tools/toolutil/swapimpl.cpp b/icu4c/source/tools/toolutil/swapimpl.cpp index ae911e0f56f..bdccb950474 100644 --- a/icu4c/source/tools/toolutil/swapimpl.cpp +++ b/icu4c/source/tools/toolutil/swapimpl.cpp @@ -1,7 +1,7 @@ /* ******************************************************************************* * -* Copyright (C) 2005-2011, International Business Machines +* Copyright (C) 2005-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* @@ -334,7 +334,7 @@ ucase_swap(const UDataSwapper *ds, ((pInfo->formatVersion[0]==1 && pInfo->formatVersion[2]==UTRIE_SHIFT && pInfo->formatVersion[3]==UTRIE_INDEX_SHIFT) || - pInfo->formatVersion[0]==2) + pInfo->formatVersion[0]==2 || pInfo->formatVersion[0]==3) )) { udata_printError(ds, "ucase_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as case mapping data\n", pInfo->dataFormat[0], pInfo->dataFormat[1], -- 2.40.0