From fa0e88725c5f6d682d8b84b470e6d06c4b973df8 Mon Sep 17 00:00:00 2001 From: Pierre Joye Date: Wed, 19 Nov 2008 17:01:17 +0000 Subject: [PATCH] - #46497, wddx_serialize treats input as ISO-8859-1 (Mark Karpeles) --- ext/wddx/tests/bug37569.phpt | 776 ++++++++++++++++++----------------- ext/wddx/tests/bug37587.phpt | 6 +- ext/wddx/wddx.c | 45 +- 3 files changed, 407 insertions(+), 420 deletions(-) diff --git a/ext/wddx/tests/bug37569.phpt b/ext/wddx/tests/bug37569.phpt index f7422c9ec2..45cd68a80b 100755 --- a/ext/wddx/tests/bug37569.phpt +++ b/ext/wddx/tests/bug37569.phpt @@ -5,774 +5,780 @@ Bug #37569 (WDDX incorrectly encodes high-ascii characters) --FILE-- = 0xc0) { + $v = chr(0xc3) . chr($i - 64); + } elseif ($i >= 0x80) { + $v = chr(0xc2) . chr($i); + } else { + $v = chr($i); // make it UTF-8 + } $ret = wddx_serialize_value($v); echo $ret . "\n"; - var_dump(ord($v), ord(wddx_deserialize($ret)), $v == wddx_deserialize($ret)); + var_dump(bin2hex($v), bin2hex(wddx_deserialize($ret)), $v == wddx_deserialize($ret)); } ?> --EXPECT--
A -int(65) -int(65) +string(2) "41" +string(2) "41" bool(true)
B -int(66) -int(66) +string(2) "42" +string(2) "42" bool(true)
C -int(67) -int(67) +string(2) "43" +string(2) "43" bool(true)
D -int(68) -int(68) +string(2) "44" +string(2) "44" bool(true)
E -int(69) -int(69) +string(2) "45" +string(2) "45" bool(true)
F -int(70) -int(70) +string(2) "46" +string(2) "46" bool(true)
G -int(71) -int(71) +string(2) "47" +string(2) "47" bool(true)
H -int(72) -int(72) +string(2) "48" +string(2) "48" bool(true)
I -int(73) -int(73) +string(2) "49" +string(2) "49" bool(true)
J -int(74) -int(74) +string(2) "4a" +string(2) "4a" bool(true)
K -int(75) -int(75) +string(2) "4b" +string(2) "4b" bool(true)
L -int(76) -int(76) +string(2) "4c" +string(2) "4c" bool(true)
M -int(77) -int(77) +string(2) "4d" +string(2) "4d" bool(true)
N -int(78) -int(78) +string(2) "4e" +string(2) "4e" bool(true)
O -int(79) -int(79) +string(2) "4f" +string(2) "4f" bool(true)
P -int(80) -int(80) +string(2) "50" +string(2) "50" bool(true)
Q -int(81) -int(81) +string(2) "51" +string(2) "51" bool(true)
R -int(82) -int(82) +string(2) "52" +string(2) "52" bool(true)
S -int(83) -int(83) +string(2) "53" +string(2) "53" bool(true)
T -int(84) -int(84) +string(2) "54" +string(2) "54" bool(true)
U -int(85) -int(85) +string(2) "55" +string(2) "55" bool(true)
V -int(86) -int(86) +string(2) "56" +string(2) "56" bool(true)
W -int(87) -int(87) +string(2) "57" +string(2) "57" bool(true)
X -int(88) -int(88) +string(2) "58" +string(2) "58" bool(true)
Y -int(89) -int(89) +string(2) "59" +string(2) "59" bool(true)
Z -int(90) -int(90) +string(2) "5a" +string(2) "5a" bool(true)
[ -int(91) -int(91) +string(2) "5b" +string(2) "5b" bool(true)
\ -int(92) -int(92) +string(2) "5c" +string(2) "5c" bool(true)
] -int(93) -int(93) +string(2) "5d" +string(2) "5d" bool(true)
^ -int(94) -int(94) +string(2) "5e" +string(2) "5e" bool(true)
_ -int(95) -int(95) +string(2) "5f" +string(2) "5f" bool(true)
` -int(96) -int(96) +string(2) "60" +string(2) "60" bool(true)
a -int(97) -int(97) +string(2) "61" +string(2) "61" bool(true)
b -int(98) -int(98) +string(2) "62" +string(2) "62" bool(true)
c -int(99) -int(99) +string(2) "63" +string(2) "63" bool(true)
d -int(100) -int(100) +string(2) "64" +string(2) "64" bool(true)
e -int(101) -int(101) +string(2) "65" +string(2) "65" bool(true)
f -int(102) -int(102) +string(2) "66" +string(2) "66" bool(true)
g -int(103) -int(103) +string(2) "67" +string(2) "67" bool(true)
h -int(104) -int(104) +string(2) "68" +string(2) "68" bool(true)
i -int(105) -int(105) +string(2) "69" +string(2) "69" bool(true)
j -int(106) -int(106) +string(2) "6a" +string(2) "6a" bool(true)
k -int(107) -int(107) +string(2) "6b" +string(2) "6b" bool(true)
l -int(108) -int(108) +string(2) "6c" +string(2) "6c" bool(true)
m -int(109) -int(109) +string(2) "6d" +string(2) "6d" bool(true)
n -int(110) -int(110) +string(2) "6e" +string(2) "6e" bool(true)
o -int(111) -int(111) +string(2) "6f" +string(2) "6f" bool(true)
p -int(112) -int(112) +string(2) "70" +string(2) "70" bool(true)
q -int(113) -int(113) +string(2) "71" +string(2) "71" bool(true)
r -int(114) -int(114) +string(2) "72" +string(2) "72" bool(true)
s -int(115) -int(115) +string(2) "73" +string(2) "73" bool(true)
t -int(116) -int(116) +string(2) "74" +string(2) "74" bool(true)
u -int(117) -int(117) +string(2) "75" +string(2) "75" bool(true)
v -int(118) -int(118) +string(2) "76" +string(2) "76" bool(true)
w -int(119) -int(119) +string(2) "77" +string(2) "77" bool(true)
x -int(120) -int(120) +string(2) "78" +string(2) "78" bool(true)
y -int(121) -int(121) +string(2) "79" +string(2) "79" bool(true)
z -int(122) -int(122) +string(2) "7a" +string(2) "7a" bool(true)
{ -int(123) -int(123) +string(2) "7b" +string(2) "7b" bool(true)
| -int(124) -int(124) +string(2) "7c" +string(2) "7c" bool(true)
} -int(125) -int(125) +string(2) "7d" +string(2) "7d" bool(true)
~ -int(126) -int(126) +string(2) "7e" +string(2) "7e" bool(true)
 -int(127) -int(127) +string(2) "7f" +string(2) "7f" bool(true)
€ -int(128) -int(128) +string(4) "c280" +string(4) "c280" bool(true)
 -int(129) -int(129) +string(4) "c281" +string(4) "c281" bool(true)
‚ -int(130) -int(130) +string(4) "c282" +string(4) "c282" bool(true)
ƒ -int(131) -int(131) +string(4) "c283" +string(4) "c283" bool(true)
„ -int(132) -int(132) +string(4) "c284" +string(4) "c284" bool(true)
… -int(133) -int(133) +string(4) "c285" +string(4) "c285" bool(true)
† -int(134) -int(134) +string(4) "c286" +string(4) "c286" bool(true)
‡ -int(135) -int(135) +string(4) "c287" +string(4) "c287" bool(true)
ˆ -int(136) -int(136) +string(4) "c288" +string(4) "c288" bool(true)
‰ -int(137) -int(137) +string(4) "c289" +string(4) "c289" bool(true)
Š -int(138) -int(138) +string(4) "c28a" +string(4) "c28a" bool(true)
‹ -int(139) -int(139) +string(4) "c28b" +string(4) "c28b" bool(true)
Œ -int(140) -int(140) +string(4) "c28c" +string(4) "c28c" bool(true)
 -int(141) -int(141) +string(4) "c28d" +string(4) "c28d" bool(true)
Ž -int(142) -int(142) +string(4) "c28e" +string(4) "c28e" bool(true)
 -int(143) -int(143) +string(4) "c28f" +string(4) "c28f" bool(true)
 -int(144) -int(144) +string(4) "c290" +string(4) "c290" bool(true)
‘ -int(145) -int(145) +string(4) "c291" +string(4) "c291" bool(true)
’ -int(146) -int(146) +string(4) "c292" +string(4) "c292" bool(true)
“ -int(147) -int(147) +string(4) "c293" +string(4) "c293" bool(true)
” -int(148) -int(148) +string(4) "c294" +string(4) "c294" bool(true)
• -int(149) -int(149) +string(4) "c295" +string(4) "c295" bool(true)
– -int(150) -int(150) +string(4) "c296" +string(4) "c296" bool(true)
— -int(151) -int(151) +string(4) "c297" +string(4) "c297" bool(true)
˜ -int(152) -int(152) +string(4) "c298" +string(4) "c298" bool(true)
™ -int(153) -int(153) +string(4) "c299" +string(4) "c299" bool(true)
š -int(154) -int(154) +string(4) "c29a" +string(4) "c29a" bool(true)
› -int(155) -int(155) +string(4) "c29b" +string(4) "c29b" bool(true)
œ -int(156) -int(156) +string(4) "c29c" +string(4) "c29c" bool(true)
 -int(157) -int(157) +string(4) "c29d" +string(4) "c29d" bool(true)
ž -int(158) -int(158) +string(4) "c29e" +string(4) "c29e" bool(true)
Ÿ -int(159) -int(159) +string(4) "c29f" +string(4) "c29f" bool(true)
  -int(160) -int(160) +string(4) "c2a0" +string(4) "c2a0" bool(true)
¡ -int(161) -int(161) +string(4) "c2a1" +string(4) "c2a1" bool(true)
¢ -int(162) -int(162) +string(4) "c2a2" +string(4) "c2a2" bool(true)
£ -int(163) -int(163) +string(4) "c2a3" +string(4) "c2a3" bool(true)
¤ -int(164) -int(164) +string(4) "c2a4" +string(4) "c2a4" bool(true)
Â¥ -int(165) -int(165) +string(4) "c2a5" +string(4) "c2a5" bool(true)
¦ -int(166) -int(166) +string(4) "c2a6" +string(4) "c2a6" bool(true)
§ -int(167) -int(167) +string(4) "c2a7" +string(4) "c2a7" bool(true)
¨ -int(168) -int(168) +string(4) "c2a8" +string(4) "c2a8" bool(true)
© -int(169) -int(169) +string(4) "c2a9" +string(4) "c2a9" bool(true)
ª -int(170) -int(170) +string(4) "c2aa" +string(4) "c2aa" bool(true)
« -int(171) -int(171) +string(4) "c2ab" +string(4) "c2ab" bool(true)
¬ -int(172) -int(172) +string(4) "c2ac" +string(4) "c2ac" bool(true)
­ -int(173) -int(173) +string(4) "c2ad" +string(4) "c2ad" bool(true)
® -int(174) -int(174) +string(4) "c2ae" +string(4) "c2ae" bool(true)
¯ -int(175) -int(175) +string(4) "c2af" +string(4) "c2af" bool(true)
° -int(176) -int(176) +string(4) "c2b0" +string(4) "c2b0" bool(true)
± -int(177) -int(177) +string(4) "c2b1" +string(4) "c2b1" bool(true)
² -int(178) -int(178) +string(4) "c2b2" +string(4) "c2b2" bool(true)
³ -int(179) -int(179) +string(4) "c2b3" +string(4) "c2b3" bool(true)
´ -int(180) -int(180) +string(4) "c2b4" +string(4) "c2b4" bool(true)
µ -int(181) -int(181) +string(4) "c2b5" +string(4) "c2b5" bool(true)
¶ -int(182) -int(182) +string(4) "c2b6" +string(4) "c2b6" bool(true)
· -int(183) -int(183) +string(4) "c2b7" +string(4) "c2b7" bool(true)
¸ -int(184) -int(184) +string(4) "c2b8" +string(4) "c2b8" bool(true)
¹ -int(185) -int(185) +string(4) "c2b9" +string(4) "c2b9" bool(true)
º -int(186) -int(186) +string(4) "c2ba" +string(4) "c2ba" bool(true)
» -int(187) -int(187) +string(4) "c2bb" +string(4) "c2bb" bool(true)
¼ -int(188) -int(188) +string(4) "c2bc" +string(4) "c2bc" bool(true)
½ -int(189) -int(189) +string(4) "c2bd" +string(4) "c2bd" bool(true)
¾ -int(190) -int(190) +string(4) "c2be" +string(4) "c2be" bool(true)
¿ -int(191) -int(191) +string(4) "c2bf" +string(4) "c2bf" bool(true)
À -int(192) -int(192) +string(4) "c380" +string(4) "c380" bool(true)
Á -int(193) -int(193) +string(4) "c381" +string(4) "c381" bool(true)
 -int(194) -int(194) +string(4) "c382" +string(4) "c382" bool(true)
à -int(195) -int(195) +string(4) "c383" +string(4) "c383" bool(true)
Ä -int(196) -int(196) +string(4) "c384" +string(4) "c384" bool(true)
Å -int(197) -int(197) +string(4) "c385" +string(4) "c385" bool(true)
Æ -int(198) -int(198) +string(4) "c386" +string(4) "c386" bool(true)
Ç -int(199) -int(199) +string(4) "c387" +string(4) "c387" bool(true)
È -int(200) -int(200) +string(4) "c388" +string(4) "c388" bool(true)
É -int(201) -int(201) +string(4) "c389" +string(4) "c389" bool(true)
Ê -int(202) -int(202) +string(4) "c38a" +string(4) "c38a" bool(true)
Ë -int(203) -int(203) +string(4) "c38b" +string(4) "c38b" bool(true)
Ì -int(204) -int(204) +string(4) "c38c" +string(4) "c38c" bool(true)
Í -int(205) -int(205) +string(4) "c38d" +string(4) "c38d" bool(true)
Î -int(206) -int(206) +string(4) "c38e" +string(4) "c38e" bool(true)
Ï -int(207) -int(207) +string(4) "c38f" +string(4) "c38f" bool(true)
Ð -int(208) -int(208) +string(4) "c390" +string(4) "c390" bool(true)
Ñ -int(209) -int(209) +string(4) "c391" +string(4) "c391" bool(true)
Ò -int(210) -int(210) +string(4) "c392" +string(4) "c392" bool(true)
Ó -int(211) -int(211) +string(4) "c393" +string(4) "c393" bool(true)
Ô -int(212) -int(212) +string(4) "c394" +string(4) "c394" bool(true)
Õ -int(213) -int(213) +string(4) "c395" +string(4) "c395" bool(true)
Ö -int(214) -int(214) +string(4) "c396" +string(4) "c396" bool(true)
× -int(215) -int(215) +string(4) "c397" +string(4) "c397" bool(true)
Ø -int(216) -int(216) +string(4) "c398" +string(4) "c398" bool(true)
Ù -int(217) -int(217) +string(4) "c399" +string(4) "c399" bool(true)
Ú -int(218) -int(218) +string(4) "c39a" +string(4) "c39a" bool(true)
Û -int(219) -int(219) +string(4) "c39b" +string(4) "c39b" bool(true)
Ü -int(220) -int(220) +string(4) "c39c" +string(4) "c39c" bool(true)
Ý -int(221) -int(221) +string(4) "c39d" +string(4) "c39d" bool(true)
Þ -int(222) -int(222) +string(4) "c39e" +string(4) "c39e" bool(true)
ß -int(223) -int(223) +string(4) "c39f" +string(4) "c39f" bool(true)
à -int(224) -int(224) +string(4) "c3a0" +string(4) "c3a0" bool(true)
á -int(225) -int(225) +string(4) "c3a1" +string(4) "c3a1" bool(true)
â -int(226) -int(226) +string(4) "c3a2" +string(4) "c3a2" bool(true)
ã -int(227) -int(227) +string(4) "c3a3" +string(4) "c3a3" bool(true)
ä -int(228) -int(228) +string(4) "c3a4" +string(4) "c3a4" bool(true)
Ã¥ -int(229) -int(229) +string(4) "c3a5" +string(4) "c3a5" bool(true)
æ -int(230) -int(230) +string(4) "c3a6" +string(4) "c3a6" bool(true)
ç -int(231) -int(231) +string(4) "c3a7" +string(4) "c3a7" bool(true)
è -int(232) -int(232) +string(4) "c3a8" +string(4) "c3a8" bool(true)
é -int(233) -int(233) +string(4) "c3a9" +string(4) "c3a9" bool(true)
ê -int(234) -int(234) +string(4) "c3aa" +string(4) "c3aa" bool(true)
ë -int(235) -int(235) +string(4) "c3ab" +string(4) "c3ab" bool(true)
ì -int(236) -int(236) +string(4) "c3ac" +string(4) "c3ac" bool(true)
í -int(237) -int(237) +string(4) "c3ad" +string(4) "c3ad" bool(true)
î -int(238) -int(238) +string(4) "c3ae" +string(4) "c3ae" bool(true)
ï -int(239) -int(239) +string(4) "c3af" +string(4) "c3af" bool(true)
ð -int(240) -int(240) +string(4) "c3b0" +string(4) "c3b0" bool(true)
ñ -int(241) -int(241) +string(4) "c3b1" +string(4) "c3b1" bool(true)
ò -int(242) -int(242) +string(4) "c3b2" +string(4) "c3b2" bool(true)
ó -int(243) -int(243) +string(4) "c3b3" +string(4) "c3b3" bool(true)
ô -int(244) -int(244) +string(4) "c3b4" +string(4) "c3b4" bool(true)
õ -int(245) -int(245) +string(4) "c3b5" +string(4) "c3b5" bool(true)
ö -int(246) -int(246) +string(4) "c3b6" +string(4) "c3b6" bool(true)
÷ -int(247) -int(247) +string(4) "c3b7" +string(4) "c3b7" bool(true)
ø -int(248) -int(248) +string(4) "c3b8" +string(4) "c3b8" bool(true)
ù -int(249) -int(249) +string(4) "c3b9" +string(4) "c3b9" bool(true)
ú -int(250) -int(250) +string(4) "c3ba" +string(4) "c3ba" bool(true)
û -int(251) -int(251) +string(4) "c3bb" +string(4) "c3bb" bool(true)
ü -int(252) -int(252) +string(4) "c3bc" +string(4) "c3bc" bool(true)
ý -int(253) -int(253) +string(4) "c3bd" +string(4) "c3bd" bool(true)
þ -int(254) -int(254) +string(4) "c3be" +string(4) "c3be" bool(true)
ÿ -int(255) -int(255) -bool(true) \ No newline at end of file +string(4) "c3bf" +string(4) "c3bf" +bool(true) diff --git a/ext/wddx/tests/bug37587.phpt b/ext/wddx/tests/bug37587.phpt index 5361d38dae..7780355b8e 100755 --- a/ext/wddx/tests/bug37587.phpt +++ b/ext/wddx/tests/bug37587.phpt @@ -5,8 +5,8 @@ Bug #37587 (var without attribute causes segfault) --FILE-- +var_dump(wddx_deserialize(<<
@@ -19,7 +19,7 @@ data:, EOF -))); +)); ?> ===DONE=== diff --git a/ext/wddx/wddx.c b/ext/wddx/wddx.c index 46ee3bba8e..ca845bd171 100644 --- a/ext/wddx/wddx.c +++ b/ext/wddx/wddx.c @@ -371,16 +371,14 @@ static void php_wddx_serialize_string(wddx_packet *packet, zval *var TSRMLS_DC) php_wddx_add_chunk_static(packet, WDDX_STRING_S); if (Z_STRLEN_P(var) > 0) { - char *buf, *enc; - int buf_len, enc_len; + char *buf; + int buf_len; buf = php_escape_html_entities(Z_STRVAL_P(var), Z_STRLEN_P(var), &buf_len, 0, ENT_QUOTES, NULL TSRMLS_CC); - enc = xml_utf8_encode(buf, buf_len, &enc_len, "ISO-8859-1"); - php_wddx_add_chunk_ex(packet, enc, enc_len); + php_wddx_add_chunk_ex(packet, buf, buf_len); efree(buf); - efree(enc); } php_wddx_add_chunk_static(packet, WDDX_STRING_E); } @@ -786,10 +784,7 @@ static void php_wddx_push_element(void *user_data, const XML_Char *name, const X if (atts) for (i = 0; atts[i]; i++) { if (!strcmp(atts[i], EL_NAME) && atts[++i] && atts[i][0]) { - char *decoded; - int decoded_len; - decoded = xml_utf8_decode(atts[i], strlen(atts[i]), &decoded_len, "ISO-8859-1"); - stack->varname = decoded; + stack->varname = estrdup(atts[i]); break; } } @@ -806,12 +801,9 @@ static void php_wddx_push_element(void *user_data, const XML_Char *name, const X zval *tmp; char *key; char *p1, *p2, *endp; - char *decoded; - int decoded_len; - decoded = xml_utf8_decode(atts[i], strlen(atts[i]), &decoded_len, "ISO-8859-1"); - endp = (char *)decoded + decoded_len; - p1 = (char *)decoded; + endp = (char *)atts[i] + strlen(atts[i]); + p1 = (char *)atts[i]; while ((p2 = php_memnstr(p1, ",", sizeof(",")-1, endp)) != NULL) { key = estrndup(p1, p2 - p1); MAKE_STD_ZVAL(tmp); @@ -827,7 +819,6 @@ static void php_wddx_push_element(void *user_data, const XML_Char *name, const X add_assoc_zval_ex(ent.data, p1, endp - p1 + 1, tmp); } - efree(decoded); break; } } @@ -843,19 +834,15 @@ static void php_wddx_push_element(void *user_data, const XML_Char *name, const X if (atts) for (i = 0; atts[i]; i++) { if (!strcmp(atts[i], EL_NAME) && atts[++i] && atts[i][0]) { - char *decoded; - int decoded_len; st_entry *recordset; zval **field; - decoded = xml_utf8_decode(atts[i], strlen(atts[i]), &decoded_len, "ISO-8859-1"); if (wddx_stack_top(stack, (void**)&recordset) == SUCCESS && recordset->type == ST_RECORDSET && - zend_hash_find(Z_ARRVAL_P(recordset->data), decoded, decoded_len+1, (void**)&field) == SUCCESS) { + zend_hash_find(Z_ARRVAL_P(recordset->data), (char*)atts[i], strlen(atts[i])+1, (void**)&field) == SUCCESS) { ent.data = *field; } - efree(decoded); break; } } @@ -1006,28 +993,22 @@ static void php_wddx_process_data(void *user_data, const XML_Char *s, int len) { st_entry *ent; wddx_stack *stack = (wddx_stack *)user_data; - char *decoded; - int decoded_len; TSRMLS_FETCH(); if (!wddx_stack_is_empty(stack) && !stack->done) { wddx_stack_top(stack, (void**)&ent); switch (Z_TYPE_P(ent)) { case ST_STRING: - decoded = xml_utf8_decode(s, len, &decoded_len, "ISO-8859-1"); - if (Z_STRLEN_P(ent->data) == 0) { STR_FREE(Z_STRVAL_P(ent->data)); - Z_STRVAL_P(ent->data) = estrndup(decoded, decoded_len); - Z_STRLEN_P(ent->data) = decoded_len; + Z_STRVAL_P(ent->data) = estrndup(s, len); + Z_STRLEN_P(ent->data) = len; } else { - Z_STRVAL_P(ent->data) = erealloc(Z_STRVAL_P(ent->data), Z_STRLEN_P(ent->data) + decoded_len + 1); - memcpy(Z_STRVAL_P(ent->data) + Z_STRLEN_P(ent->data), decoded, decoded_len); - Z_STRLEN_P(ent->data) += decoded_len; + Z_STRVAL_P(ent->data) = erealloc(Z_STRVAL_P(ent->data), Z_STRLEN_P(ent->data) + len + 1); + memcpy(Z_STRVAL_P(ent->data) + Z_STRLEN_P(ent->data), s, len); + Z_STRLEN_P(ent->data) += len; Z_STRVAL_P(ent->data)[Z_STRLEN_P(ent->data)] = '\0'; } - - efree(decoded); break; case ST_BINARY: @@ -1098,7 +1079,7 @@ int php_wddx_deserialize_ex(char *value, int vallen, zval *return_value) int retval; wddx_stack_init(&stack); - parser = XML_ParserCreate("ISO-8859-1"); + parser = XML_ParserCreate("UTF-8"); XML_SetUserData(parser, &stack); XML_SetElementHandler(parser, php_wddx_push_element, php_wddx_pop_element); -- 2.40.0