From: Stanislav Malyshev Date: Tue, 8 Nov 2011 04:59:17 +0000 (+0000) Subject: Fixed bug #54084 (token_get_all with regards to __halt_compiler is not binary safe) X-Git-Tag: php-5.4.0RC1~13 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=5063a77128b44abe61742935de4ac86dc7351b68;p=php Fixed bug #54084 (token_get_all with regards to __halt_compiler is not binary safe) --- diff --git a/ext/tokenizer/tests/bug54089.phpt b/ext/tokenizer/tests/bug54089.phpt index e1f6d79e9c..77a4c1f324 100644 --- a/ext/tokenizer/tests/bug54089.phpt +++ b/ext/tokenizer/tests/bug54089.phpt @@ -4,17 +4,26 @@ Bug #54089 (token_get_all() does not stop after __halt_compiler) --FILE-- \x02"; -$tokens = token_get_all($code); - -var_dump($tokens); - -$code = ''; -foreach ($tokens as $t) -{ - $code .= isset($t[1]) ? $t[1] : $t; +$codes = array( + " --EXPECTF-- array(2) { @@ -38,3 +47,248 @@ array(2) { } } string(21) " + array(3) { + [0]=> + int(%d) + [1]=> + string(6) " + int(1) + } + [1]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(15) "__halt_compiler" + [2]=> + int(1) + } + [2]=> + string(1) "(" +} +string(22) " + array(3) { + [0]=> + int(%d) + [1]=> + string(6) " + int(1) + } + [1]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(15) "__halt_compiler" + [2]=> + int(1) + } + [2]=> + string(1) "(" + [3]=> + string(1) ")" + [4]=> + string(1) ";" +} +string(24) " + array(3) { + [0]=> + int(%d) + [1]=> + string(6) " + int(1) + } + [1]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(15) "__halt_compiler" + [2]=> + int(1) + } + [2]=> + string(1) "(" + [3]=> + string(1) ")" + [4]=> + string(1) ";" + [5]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(3) "ABC" + [2]=> + int(1) + } +} +string(27) " + array(3) { + [0]=> + int(%d) + [1]=> + string(6) " + int(1) + } + [1]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(15) "__halt_compiler" + [2]=> + int(1) + } + [2]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(1) + } + [3]=> + string(1) "(" + [4]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(2) + } + [5]=> + string(1) ")" + [6]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(3) + } + [7]=> + string(1) ";" + [8]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(3) "ABC" + [2]=> + int(4) + } +} +string(30) " + array(3) { + [0]=> + int(%d) + [1]=> + string(6) " + int(1) + } + [1]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(15) "__halt_compiler" + [2]=> + int(1) + } + [2]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(1) + } + [3]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(3) "abc" + [2]=> + int(2) + } + [4]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(2) + } + [5]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(3) "def" + [2]=> + int(3) + } + [6]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(3) + } + [7]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(3) "ghi" + [2]=> + int(4) + } + [8]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(4) " ABC" + [2]=> + int(4) + } +} +string(37) "'; $tokens = token_get_all($source); var_dump($tokens); -echo "Done" +echo "Done"; ?> --EXPECTF-- *** Testing token_get_all() : with different function constructs *** -array(135) { +array(142) { [0]=> array(3) { [0]=> @@ -958,9 +958,46 @@ array(135) { [0]=> int(%d) [1]=> - string(15) "__halt_compiler" + string(10) "myFunction" + [2]=> + int(26) + } + [135]=> + string(1) "(" + [136]=> + string(1) ")" + [137]=> + string(1) ";" + [138]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" [2]=> int(26) } + [139]=> + string(1) "}" + [140]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(27) + } + [141]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(2) "?>" + [2]=> + int(28) + } } Done diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index c6c901f352..afdd85b4c3 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -32,8 +32,10 @@ #include "zend_language_scanner_defs.h" #include -#define zendtext LANG_SCNG(yy_text) -#define zendleng LANG_SCNG(yy_leng) +#define zendtext LANG_SCNG(yy_text) +#define zendleng LANG_SCNG(yy_leng) +#define zendcursor LANG_SCNG(yy_cursor) +#define zendlimit LANG_SCNG(yy_limit) /* {{{ arginfo */ ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1) @@ -106,6 +108,7 @@ static void tokenize(zval *return_value TSRMLS_DC) int token_type; zend_bool destroy; int token_line = 1; + int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled array_init(return_value); @@ -150,11 +153,28 @@ static void tokenize(zval *return_value TSRMLS_DC) } ZVAL_NULL(&token); - token_line = CG(zend_lineno); - - if (token_type == T_HALT_COMPILER) { - break; + // after T_HALT_COMPILER collect the next three non-dropped tokens + if (need_tokens != -1) { + if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG + && token_type != T_COMMENT && token_type != T_DOC_COMMENT + && --need_tokens == 0 + ) { + // fetch the rest into a T_INLINE_HTML + if (zendcursor != zendlimit) { + MAKE_STD_ZVAL(keyword); + array_init(keyword); + add_next_index_long(keyword, T_INLINE_HTML); + add_next_index_stringl(keyword, (char *)zendcursor, zendlimit - zendcursor, 1); + add_next_index_long(keyword, token_line); + add_next_index_zval(return_value, keyword); + } + break; + } + } else if (token_type == T_HALT_COMPILER) { + need_tokens = 3; } + + token_line = CG(zend_lineno); } }