]> granicus.if.org Git - php/commitdiff
ext tokenizer port + cleanup unused lexer states
authorMárcio Almada <marcio3w@gmail.com>
Sun, 5 Apr 2015 11:50:35 +0000 (08:50 -0300)
committerMárcio Almada <marcio3w@gmail.com>
Thu, 30 Apr 2015 06:03:29 +0000 (03:03 -0300)
we basically added a mechanism to store the token stream during parsing
and exposed the entire parser stack on the tokenizer extension through
an opt in flag: token_get_all($src, TOKEN_PARSE).

this change allows easy future language enhancements regarding context
aware parsing & scanning without further maintance on the tokenizer
extension while solves known inconsistencies "parseless" tokenizer
extension has when it handles `__halt_compiler()` presence.

Zend/zend_compile.c
Zend/zend_globals.h
Zend/zend_language_parser.y
Zend/zend_language_scanner.h
Zend/zend_language_scanner.l
ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt [new file with mode: 0644]
ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt [new file with mode: 0644]
ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt [new file with mode: 0644]
ext/tokenizer/tests/token_get_all_error.phpt
ext/tokenizer/tokenizer.c

index c92a25a705389c043b33d02741a2ae3f65d53b06..210810379f58f8c1eb3ff28e61b14a6f2cf59490 100644 (file)
@@ -30,7 +30,6 @@
 #include "zend_interfaces.h"
 #include "zend_virtual_cwd.h"
 #include "zend_multibyte.h"
-#include "zend_language_scanner.h"
 #include "zend_inheritance.h"
 
 #define SET_NODE(target, src) do { \
@@ -568,7 +567,10 @@ static int zend_add_const_name_literal(zend_op_array *op_array, zend_string *nam
                op.constant = zend_add_literal(CG(active_op_array), &_c); \
        } while (0)
 
-void zend_stop_lexing(void) {
+void zend_stop_lexing(void)
+{
+       if(LANG_SCNG(on_event)) LANG_SCNG(on_event)(ON_STOP, END, 0);
+
        LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit);
 }
 
index 326955a103b3cdc34456fd4aaca4bce0267c7237..28487a2a4a1859894569efc42f9fd9fa0b179650 100644 (file)
@@ -249,6 +249,12 @@ struct _zend_ini_scanner_globals {
        int scanner_mode;
 };
 
+typedef enum {
+       ON_TOKEN,
+       ON_FEEDBACK,
+       ON_STOP
+} zend_php_scanner_event;
+
 struct _zend_php_scanner_globals {
        zend_file_handle *yy_in;
        zend_file_handle *yy_out;
@@ -278,6 +284,9 @@ struct _zend_php_scanner_globals {
 
        /* initial string length after scanning to first variable */
        int scanned_string_len;
+
+       /* hooks */
+       void (* on_event)(zend_php_scanner_event event, int token, int line);
 };
 
 #endif /* ZEND_GLOBALS_H */
index cefcd0cad928e936ae42b1610d218afee70cbf3b..f6318ec0c0e33e825298e58de208fd2472f4fcf4 100644 (file)
@@ -35,7 +35,7 @@
 #include "zend_globals.h"
 #include "zend_API.h"
 #include "zend_constants.h"
-#include "zend_language_scanner_defs.h"
+#include "zend_language_scanner.h"
 
 #define YYSIZE_T size_t
 #define yytnamerr zend_yytnamerr
@@ -49,12 +49,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
 #define YYFREE free
 #endif
 
-#define REWIND { \
-       zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
-       LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
-       LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
-       LANG_SCNG(yy_leng)   = 0; }
-
 %}
 
 %pure_parser
@@ -290,7 +284,11 @@ semi_reserved:
 
 identifier:
                T_STRING { $$ = $1; }
-       |       /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
+       |       semi_reserved  {
+                       zval zv;
+                       zend_lex_tstring(&zv);
+                       $$ = zend_ast_create_zval(&zv);
+               }
 ;
 
 top_statement_list:
index c82b3069c5906c8c30742cbc38d0ccc0c42f8f0b..3b75ff8cc45a0a9301dcc132b554f76f7696f8d1 100644 (file)
@@ -50,6 +50,9 @@ typedef struct _zend_lex_state {
        zend_encoding_filter output_filter;
        const zend_encoding *script_encoding;
 
+       /* hooks */
+       void (* on_event)(zend_php_scanner_event event, int token, int line);
+
        zend_ast *ast;
        zend_arena *ast_arena;
 } zend_lex_state;
@@ -66,6 +69,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename);
 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding);
 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding);
+ZEND_API void zend_lex_tstring(zval *zv);
 
 END_EXTERN_C()
 
index 2481af605b7df70f83372df76e11751ed42a9f6e..cde0621df0e6cff2505750f263af6f70d8f5a2b0 100644 (file)
@@ -193,6 +193,7 @@ void shutdown_scanner(void)
        zend_stack_destroy(&SCNG(state_stack));
        zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
        zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
+       SCNG(on_event) = NULL;
 }
 
 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
@@ -223,6 +224,8 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
        lex_state->output_filter = SCNG(output_filter);
        lex_state->script_encoding = SCNG(script_encoding);
 
+       lex_state->on_event = SCNG(on_event);
+
        lex_state->ast = CG(ast);
        lex_state->ast_arena = CG(ast_arena);
 }
@@ -260,6 +263,8 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)
        SCNG(output_filter) = lex_state->output_filter;
        SCNG(script_encoding) = lex_state->script_encoding;
 
+       SCNG(on_event) = lex_state->on_event;
+
        CG(ast) = lex_state->ast;
        CG(ast_arena) = lex_state->ast_arena;
 
@@ -276,6 +281,13 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
        }
 }
 
+ZEND_API void zend_lex_tstring(zval *zv)
+{
+       if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
+
+       ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
+}
+
 #define BOM_UTF32_BE   "\x00\x00\xfe\xff"
 #define        BOM_UTF32_LE    "\xff\xfe\x00\x00"
 #define        BOM_UTF16_BE    "\xfe\xff"
@@ -1083,9 +1095,20 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot
        return SUCCESS;
 }
 
+static zend_always_inline int emit_token(int token, int token_line)
+{
+       if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
+
+       return token;
+}
+
+#define RETURN_TOKEN(token) return emit_token(token, start_line);
 
 int lex_scan(zval *zendlval)
 {
+
+int start_line = CG(zend_lineno);
+
 restart:
        SCNG(yy_text) = YYCURSOR;
 
@@ -1107,183 +1130,183 @@ NEWLINE ("\r"|"\n"|"\r\n")
 <!*> := yyleng = YYCURSOR - SCNG(yy_text);
 
 <ST_IN_SCRIPTING>"exit" {
-       return T_EXIT;
+       RETURN_TOKEN(T_EXIT);
 }
 
 <ST_IN_SCRIPTING>"die" {
-       return T_EXIT;
+       RETURN_TOKEN(T_EXIT);
 }
 
 <ST_IN_SCRIPTING>"function" {
-       return T_FUNCTION;
+       RETURN_TOKEN(T_FUNCTION);
 }
 
 <ST_IN_SCRIPTING>"const" {
-       return T_CONST;
+       RETURN_TOKEN(T_CONST);
 }
 
 <ST_IN_SCRIPTING>"return" {
-       return T_RETURN;
+       RETURN_TOKEN(T_RETURN);
 }
 
 <ST_IN_SCRIPTING>"yield"{WHITESPACE}"from" {
-       return T_YIELD_FROM;
+       RETURN_TOKEN(T_YIELD_FROM);
 }
 
 <ST_IN_SCRIPTING>"yield" {
-       return T_YIELD;
+       RETURN_TOKEN(T_YIELD);
 }
 
 <ST_IN_SCRIPTING>"try" {
-       return T_TRY;
+       RETURN_TOKEN(T_TRY);
 }
 
 <ST_IN_SCRIPTING>"catch" {
-       return T_CATCH;
+       RETURN_TOKEN(T_CATCH);
 }
 
 <ST_IN_SCRIPTING>"finally" {
-       return T_FINALLY;
+       RETURN_TOKEN(T_FINALLY);
 }
 
 <ST_IN_SCRIPTING>"throw" {
-       return T_THROW;
+       RETURN_TOKEN(T_THROW);
 }
 
 <ST_IN_SCRIPTING>"if" {
-       return T_IF;
+       RETURN_TOKEN(T_IF);
 }
 
 <ST_IN_SCRIPTING>"elseif" {
-       return T_ELSEIF;
+       RETURN_TOKEN(T_ELSEIF);
 }
 
 <ST_IN_SCRIPTING>"endif" {
-       return T_ENDIF;
+       RETURN_TOKEN(T_ENDIF);
 }
 
 <ST_IN_SCRIPTING>"else" {
-       return T_ELSE;
+       RETURN_TOKEN(T_ELSE);
 }
 
 <ST_IN_SCRIPTING>"while" {
-       return T_WHILE;
+       RETURN_TOKEN(T_WHILE);
 }
 
 <ST_IN_SCRIPTING>"endwhile" {
-       return T_ENDWHILE;
+       RETURN_TOKEN(T_ENDWHILE);
 }
 
 <ST_IN_SCRIPTING>"do" {
-       return T_DO;
+       RETURN_TOKEN(T_DO);
 }
 
 <ST_IN_SCRIPTING>"for" {
-       return T_FOR;
+       RETURN_TOKEN(T_FOR);
 }
 
 <ST_IN_SCRIPTING>"endfor" {
-       return T_ENDFOR;
+       RETURN_TOKEN(T_ENDFOR);
 }
 
 <ST_IN_SCRIPTING>"foreach" {
-       return T_FOREACH;
+       RETURN_TOKEN(T_FOREACH);
 }
 
 <ST_IN_SCRIPTING>"endforeach" {
-       return T_ENDFOREACH;
+       RETURN_TOKEN(T_ENDFOREACH);
 }
 
 <ST_IN_SCRIPTING>"declare" {
-       return T_DECLARE;
+       RETURN_TOKEN(T_DECLARE);
 }
 
 <ST_IN_SCRIPTING>"enddeclare" {
-       return T_ENDDECLARE;
+       RETURN_TOKEN(T_ENDDECLARE);
 }
 
 <ST_IN_SCRIPTING>"instanceof" {
-       return T_INSTANCEOF;
+       RETURN_TOKEN(T_INSTANCEOF);
 }
 
 <ST_IN_SCRIPTING>"as" {
-       return T_AS;
+       RETURN_TOKEN(T_AS);
 }
 
 <ST_IN_SCRIPTING>"switch" {
-       return T_SWITCH;
+       RETURN_TOKEN(T_SWITCH);
 }
 
 <ST_IN_SCRIPTING>"endswitch" {
-       return T_ENDSWITCH;
+       RETURN_TOKEN(T_ENDSWITCH);
 }
 
 <ST_IN_SCRIPTING>"case" {
-       return T_CASE;
+       RETURN_TOKEN(T_CASE);
 }
 
 <ST_IN_SCRIPTING>"default" {
-       return T_DEFAULT;
+       RETURN_TOKEN(T_DEFAULT);
 }
 
 <ST_IN_SCRIPTING>"break" {
-       return T_BREAK;
+       RETURN_TOKEN(T_BREAK);
 }
 
 <ST_IN_SCRIPTING>"continue" {
-       return T_CONTINUE;
+       RETURN_TOKEN(T_CONTINUE);
 }
 
 <ST_IN_SCRIPTING>"goto" {
-       return T_GOTO;
+       RETURN_TOKEN(T_GOTO);
 }
 
 <ST_IN_SCRIPTING>"echo" {
-       return T_ECHO;
+       RETURN_TOKEN(T_ECHO);
 }
 
 <ST_IN_SCRIPTING>"print" {
-       return T_PRINT;
+       RETURN_TOKEN(T_PRINT);
 }
 
 <ST_IN_SCRIPTING>"class" {
-       return T_CLASS;
+       RETURN_TOKEN(T_CLASS);
 }
 
 <ST_IN_SCRIPTING>"interface" {
-       return T_INTERFACE;
+       RETURN_TOKEN(T_INTERFACE);
 }
 
 <ST_IN_SCRIPTING>"trait" {
-       return T_TRAIT;
+       RETURN_TOKEN(T_TRAIT);
 }
 
 <ST_IN_SCRIPTING>"extends" {
-       return T_EXTENDS;
+       RETURN_TOKEN(T_EXTENDS);
 }
 
 <ST_IN_SCRIPTING>"implements" {
-       return T_IMPLEMENTS;
+       RETURN_TOKEN(T_IMPLEMENTS);
 }
 
 <ST_IN_SCRIPTING>"->" {
        yy_push_state(ST_LOOKING_FOR_PROPERTY);
-       return T_OBJECT_OPERATOR;
+       RETURN_TOKEN(T_OBJECT_OPERATOR);
 }
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY,ST_LOOKING_FOR_SEMI_RESERVED_NAME>{WHITESPACE}+ {
+<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
        HANDLE_NEWLINES(yytext, yyleng);
-       return T_WHITESPACE;
+       RETURN_TOKEN(T_WHITESPACE);
 }
 
 <ST_LOOKING_FOR_PROPERTY>"->" {
-       return T_OBJECT_OPERATOR;
+       RETURN_TOKEN(T_OBJECT_OPERATOR);
 }
 
 <ST_LOOKING_FOR_PROPERTY>{LABEL} {
        yy_pop_state();
        zend_copy_value(zendlval, yytext, yyleng);
-       return T_STRING;
+       RETURN_TOKEN(T_STRING);
 }
 
 <ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
@@ -1293,283 +1316,283 @@ NEWLINE ("\r"|"\n"|"\r\n")
 }
 
 <ST_IN_SCRIPTING>"::" {
-       return T_PAAMAYIM_NEKUDOTAYIM;
+       RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
 }
 
 <ST_IN_SCRIPTING>"\\" {
-       return T_NS_SEPARATOR;
+       RETURN_TOKEN(T_NS_SEPARATOR);
 }
 
 <ST_IN_SCRIPTING>"..." {
-       return T_ELLIPSIS;
+       RETURN_TOKEN(T_ELLIPSIS);
 }
 
 <ST_IN_SCRIPTING>"??" {
-       return T_COALESCE;
+       RETURN_TOKEN(T_COALESCE);
 }
 
 <ST_IN_SCRIPTING>"new" {
-       return T_NEW;
+       RETURN_TOKEN(T_NEW);
 }
 
 <ST_IN_SCRIPTING>"clone" {
-       return T_CLONE;
+       RETURN_TOKEN(T_CLONE);
 }
 
 <ST_IN_SCRIPTING>"var" {
-       return T_VAR;
+       RETURN_TOKEN(T_VAR);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
-       return T_INT_CAST;
+       RETURN_TOKEN(T_INT_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
-       return T_DOUBLE_CAST;
+       RETURN_TOKEN(T_DOUBLE_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
-       return T_STRING_CAST;
+       RETURN_TOKEN(T_STRING_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
-       return T_ARRAY_CAST;
+       RETURN_TOKEN(T_ARRAY_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
-       return T_OBJECT_CAST;
+       RETURN_TOKEN(T_OBJECT_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
-       return T_BOOL_CAST;
+       RETURN_TOKEN(T_BOOL_CAST);
 }
 
 <ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
-       return T_UNSET_CAST;
+       RETURN_TOKEN(T_UNSET_CAST);
 }
 
 <ST_IN_SCRIPTING>"eval" {
-       return T_EVAL;
+       RETURN_TOKEN(T_EVAL);
 }
 
 <ST_IN_SCRIPTING>"include" {
-       return T_INCLUDE;
+       RETURN_TOKEN(T_INCLUDE);
 }
 
 <ST_IN_SCRIPTING>"include_once" {
-       return T_INCLUDE_ONCE;
+       RETURN_TOKEN(T_INCLUDE_ONCE);
 }
 
 <ST_IN_SCRIPTING>"require" {
-       return T_REQUIRE;
+       RETURN_TOKEN(T_REQUIRE);
 }
 
 <ST_IN_SCRIPTING>"require_once" {
-       return T_REQUIRE_ONCE;
+       RETURN_TOKEN(T_REQUIRE_ONCE);
 }
 
 <ST_IN_SCRIPTING>"namespace" {
-       return T_NAMESPACE;
+       RETURN_TOKEN(T_NAMESPACE);
 }
 
 <ST_IN_SCRIPTING>"use" {
-       return T_USE;
+       RETURN_TOKEN(T_USE);
 }
 
 <ST_IN_SCRIPTING>"insteadof" {
-        return T_INSTEADOF;
+    RETURN_TOKEN(T_INSTEADOF);
 }
 
 <ST_IN_SCRIPTING>"global" {
-       return T_GLOBAL;
+       RETURN_TOKEN(T_GLOBAL);
 }
 
 <ST_IN_SCRIPTING>"isset" {
-       return T_ISSET;
+       RETURN_TOKEN(T_ISSET);
 }
 
 <ST_IN_SCRIPTING>"empty" {
-       return T_EMPTY;
+       RETURN_TOKEN(T_EMPTY);
 }
 
 <ST_IN_SCRIPTING>"__halt_compiler" {
-       return T_HALT_COMPILER;
+       RETURN_TOKEN(T_HALT_COMPILER);
 }
 
 <ST_IN_SCRIPTING>"static" {
-       return T_STATIC;
+       RETURN_TOKEN(T_STATIC);
 }
 
 <ST_IN_SCRIPTING>"abstract" {
-       return T_ABSTRACT;
+       RETURN_TOKEN(T_ABSTRACT);
 }
 
 <ST_IN_SCRIPTING>"final" {
-       return T_FINAL;
+       RETURN_TOKEN(T_FINAL);
 }
 
 <ST_IN_SCRIPTING>"private" {
-       return T_PRIVATE;
+       RETURN_TOKEN(T_PRIVATE);
 }
 
 <ST_IN_SCRIPTING>"protected" {
-       return T_PROTECTED;
+       RETURN_TOKEN(T_PROTECTED);
 }
 
 <ST_IN_SCRIPTING>"public" {
-       return T_PUBLIC;
+       RETURN_TOKEN(T_PUBLIC);
 }
 
 <ST_IN_SCRIPTING>"unset" {
-       return T_UNSET;
+       RETURN_TOKEN(T_UNSET);
 }
 
 <ST_IN_SCRIPTING>"=>" {
-       return T_DOUBLE_ARROW;
+       RETURN_TOKEN(T_DOUBLE_ARROW);
 }
 
 <ST_IN_SCRIPTING>"list" {
-       return T_LIST;
+       RETURN_TOKEN(T_LIST);
 }
 
 <ST_IN_SCRIPTING>"array" {
-       return T_ARRAY;
+       RETURN_TOKEN(T_ARRAY);
 }
 
 <ST_IN_SCRIPTING>"callable" {
return T_CALLABLE;
      RETURN_TOKEN(T_CALLABLE);
 }
 
 <ST_IN_SCRIPTING>"++" {
-       return T_INC;
+       RETURN_TOKEN(T_INC);
 }
 
 <ST_IN_SCRIPTING>"--" {
-       return T_DEC;
+       RETURN_TOKEN(T_DEC);
 }
 
 <ST_IN_SCRIPTING>"===" {
-       return T_IS_IDENTICAL;
+       RETURN_TOKEN(T_IS_IDENTICAL);
 }
 
 <ST_IN_SCRIPTING>"!==" {
-       return T_IS_NOT_IDENTICAL;
+       RETURN_TOKEN(T_IS_NOT_IDENTICAL);
 }
 
 <ST_IN_SCRIPTING>"==" {
-       return T_IS_EQUAL;
+       RETURN_TOKEN(T_IS_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"!="|"<>" {
-       return T_IS_NOT_EQUAL;
+       RETURN_TOKEN(T_IS_NOT_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"<=>" {
-       return T_SPACESHIP;
+       RETURN_TOKEN(T_SPACESHIP);
 }
 
 <ST_IN_SCRIPTING>"<=" {
-       return T_IS_SMALLER_OR_EQUAL;
+       RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>">=" {
-       return T_IS_GREATER_OR_EQUAL;
+       RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"+=" {
-       return T_PLUS_EQUAL;
+       RETURN_TOKEN(T_PLUS_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"-=" {
-       return T_MINUS_EQUAL;
+       RETURN_TOKEN(T_MINUS_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"*=" {
-       return T_MUL_EQUAL;
+       RETURN_TOKEN(T_MUL_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"*\*" {
-       return T_POW;
+       RETURN_TOKEN(T_POW);
 }
 
 <ST_IN_SCRIPTING>"*\*=" {
-       return T_POW_EQUAL;
+       RETURN_TOKEN(T_POW_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"/=" {
-       return T_DIV_EQUAL;
+       RETURN_TOKEN(T_DIV_EQUAL);
 }
 
 <ST_IN_SCRIPTING>".=" {
-       return T_CONCAT_EQUAL;
+       RETURN_TOKEN(T_CONCAT_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"%=" {
-       return T_MOD_EQUAL;
+       RETURN_TOKEN(T_MOD_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"<<=" {
-       return T_SL_EQUAL;
+       RETURN_TOKEN(T_SL_EQUAL);
 }
 
 <ST_IN_SCRIPTING>">>=" {
-       return T_SR_EQUAL;
+       RETURN_TOKEN(T_SR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"&=" {
-       return T_AND_EQUAL;
+       RETURN_TOKEN(T_AND_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"|=" {
-       return T_OR_EQUAL;
+       RETURN_TOKEN(T_OR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"^=" {
-       return T_XOR_EQUAL;
+       RETURN_TOKEN(T_XOR_EQUAL);
 }
 
 <ST_IN_SCRIPTING>"||" {
-       return T_BOOLEAN_OR;
+       RETURN_TOKEN(T_BOOLEAN_OR);
 }
 
 <ST_IN_SCRIPTING>"&&" {
-       return T_BOOLEAN_AND;
+       RETURN_TOKEN(T_BOOLEAN_AND);
 }
 
 <ST_IN_SCRIPTING>"OR" {
-       return T_LOGICAL_OR;
+       RETURN_TOKEN(T_LOGICAL_OR);
 }
 
 <ST_IN_SCRIPTING>"AND" {
-       return T_LOGICAL_AND;
+       RETURN_TOKEN(T_LOGICAL_AND);
 }
 
 <ST_IN_SCRIPTING>"XOR" {
-       return T_LOGICAL_XOR;
+       RETURN_TOKEN(T_LOGICAL_XOR);
 }
 
 <ST_IN_SCRIPTING>"<<" {
-       return T_SL;
+       RETURN_TOKEN(T_SL);
 }
 
 <ST_IN_SCRIPTING>">>" {
-       return T_SR;
+       RETURN_TOKEN(T_SR);
 }
 
 <ST_IN_SCRIPTING>{TOKENS} {
-       return yytext[0];
+       RETURN_TOKEN(yytext[0]);
 }
 
 
 <ST_IN_SCRIPTING>"{" {
        yy_push_state(ST_IN_SCRIPTING);
-       return '{';
+       RETURN_TOKEN('{');
 }
 
 
 <ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
        yy_push_state(ST_LOOKING_FOR_VARNAME);
-       return T_DOLLAR_OPEN_CURLY_BRACES;
+       RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
 }
 
 
@@ -1578,7 +1601,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
        if (!zend_stack_is_empty(&SCNG(state_stack))) {
                yy_pop_state();
        }
-       return '}';
+       RETURN_TOKEN('}');
 }
 
 
@@ -1587,7 +1610,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
        zend_copy_value(zendlval, yytext, yyleng);
        yy_pop_state();
        yy_push_state(ST_IN_SCRIPTING);
-       return T_STRING_VARNAME;
+       RETURN_TOKEN(T_STRING_VARNAME);
 }
 
 
@@ -1617,12 +1640,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
                        ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
                        ZEND_ASSERT(!errno && end == yytext + yyleng);
                }
-               return T_LNUMBER;
+               RETURN_TOKEN(T_LNUMBER);
        } else {
                ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
                /* errno isn't checked since we allow HUGE_VAL/INF overflow */
                ZEND_ASSERT(end == yytext + yyleng);
-               return T_DNUMBER;
+               RETURN_TOKEN(T_DNUMBER);
        }
 }
 
@@ -1636,7 +1659,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
                 */
                if (end != yytext + yyleng) {
                        zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
-                       return T_ERROR;
+                       RETURN_TOKEN(T_ERROR);
                }
        } else {
                errno = 0;
@@ -1653,19 +1676,19 @@ NEWLINE ("\r"|"\n"|"\r\n")
                        if (end != yytext + yyleng) {
                                zend_throw_exception(zend_get_parse_exception(),
                                        "Invalid numeric literal", E_PARSE);
-                               return T_ERROR;
+                               RETURN_TOKEN(T_ERROR);
                        }
                        ZEND_ASSERT(!errno);
-                       return T_DNUMBER;
+                       RETURN_TOKEN(T_DNUMBER);
                }
                /* Also not an assert for the same reason */
                if (end != yytext + yyleng) {
                        zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
-                       return T_ERROR;
+                       RETURN_TOKEN(T_ERROR);
                }
        }
        ZEND_ASSERT(!errno);
-       return T_LNUMBER;
+       RETURN_TOKEN(T_LNUMBER);
 }
 
 <ST_IN_SCRIPTING>{HNUM} {
@@ -1687,12 +1710,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
                        ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
                        ZEND_ASSERT(!errno && end == hex + len);
                }
-               return T_LNUMBER;
+               RETURN_TOKEN(T_LNUMBER);
        } else {
                ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
                /* errno isn't checked since we allow HUGE_VAL/INF overflow */
                ZEND_ASSERT(end == hex + len);
-               return T_DNUMBER;
+               RETURN_TOKEN(T_DNUMBER);
        }
 }
 
@@ -1709,12 +1732,12 @@ NEWLINE ("\r"|"\n"|"\r\n")
 string:
                ZVAL_STRINGL(zendlval, yytext, yyleng);
        }
-       return T_NUM_STRING;
+       RETURN_TOKEN(T_NUM_STRING);
 }
 
 <ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
        ZVAL_STRINGL(zendlval, yytext, yyleng);
-       return T_NUM_STRING;
+       RETURN_TOKEN(T_NUM_STRING);
 }
 
 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
@@ -1723,59 +1746,59 @@ string:
        ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
        ZEND_ASSERT(end == yytext + yyleng);
-       return T_DNUMBER;
+       RETURN_TOKEN(T_DNUMBER);
 }
 
 <ST_IN_SCRIPTING>"__CLASS__" {
-       return T_CLASS_C;
+       RETURN_TOKEN(T_CLASS_C);
 }
 
 <ST_IN_SCRIPTING>"__TRAIT__" {
-       return T_TRAIT_C;
+       RETURN_TOKEN(T_TRAIT_C);
 }
 
 <ST_IN_SCRIPTING>"__FUNCTION__" {
-       return T_FUNC_C;
+       RETURN_TOKEN(T_FUNC_C);
 }
 
 <ST_IN_SCRIPTING>"__METHOD__" {
-       return T_METHOD_C;
+       RETURN_TOKEN(T_METHOD_C);
 }
 
 <ST_IN_SCRIPTING>"__LINE__" {
-       return T_LINE;
+       RETURN_TOKEN(T_LINE);
 }
 
 <ST_IN_SCRIPTING>"__FILE__" {
-       return T_FILE;
+       RETURN_TOKEN(T_FILE);
 }
 
 <ST_IN_SCRIPTING>"__DIR__" {
-       return T_DIR;
+       RETURN_TOKEN(T_DIR);
 }
 
 <ST_IN_SCRIPTING>"__NAMESPACE__" {
-       return T_NS_C;
+       RETURN_TOKEN(T_NS_C);
 }
 
 
 <INITIAL>"<?=" {
        BEGIN(ST_IN_SCRIPTING);
-       return T_OPEN_TAG_WITH_ECHO;
+       RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
 }
 
 
 <INITIAL>"<?php"([ \t]|{NEWLINE}) {
        HANDLE_NEWLINE(yytext[yyleng-1]);
        BEGIN(ST_IN_SCRIPTING);
-       return T_OPEN_TAG;
+       RETURN_TOKEN(T_OPEN_TAG);
 }
 
 
 <INITIAL>"<?" {
        if (CG(short_tags)) {
                BEGIN(ST_IN_SCRIPTING);
-               return T_OPEN_TAG;
+               RETURN_TOKEN(T_OPEN_TAG);
        } else {
                goto inline_char_handler;
        }
@@ -1783,7 +1806,7 @@ string:
 
 <INITIAL>{ANY_CHAR} {
        if (YYCURSOR > YYLIMIT) {
-               return 0;
+               RETURN_TOKEN(END);
        }
 
 inline_char_handler:
@@ -1823,7 +1846,7 @@ inline_char_handler:
          ZVAL_STRINGL(zendlval, yytext, yyleng);
        }
        HANDLE_NEWLINES(yytext, yyleng);
-       return T_INLINE_HTML;
+       RETURN_TOKEN(T_INLINE_HTML);
 }
 
 
@@ -1834,7 +1857,7 @@ inline_char_handler:
        yyless(yyleng - 3);
        yy_push_state(ST_LOOKING_FOR_PROPERTY);
        zend_copy_value(zendlval, (yytext+1), (yyleng-1));
-       return T_VARIABLE;
+       RETURN_TOKEN(T_VARIABLE);
 }
 
 /* A [ always designates a variable offset, regardless of what follows
@@ -1843,22 +1866,22 @@ inline_char_handler:
        yyless(yyleng - 1);
        yy_push_state(ST_VAR_OFFSET);
        zend_copy_value(zendlval, (yytext+1), (yyleng-1));
-       return T_VARIABLE;
+       RETURN_TOKEN(T_VARIABLE);
 }
 
 <ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
        zend_copy_value(zendlval, (yytext+1), (yyleng-1));
-       return T_VARIABLE;
+       RETURN_TOKEN(T_VARIABLE);
 }
 
 <ST_VAR_OFFSET>"]" {
        yy_pop_state();
-       return ']';
+       RETURN_TOKEN(']');
 }
 
 <ST_VAR_OFFSET>{TOKENS}|[{}"`] {
        /* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
-       return yytext[0];
+       RETURN_TOKEN(yytext[0]);
 }
 
 <ST_VAR_OFFSET>[ \n\r\t\\'#] {
@@ -1866,16 +1889,16 @@ inline_char_handler:
        yyless(0);
        yy_pop_state();
        ZVAL_NULL(zendlval);
-       return T_ENCAPSED_AND_WHITESPACE;
+       RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
        zend_copy_value(zendlval, yytext, yyleng);
-       return T_STRING;
+       RETURN_TOKEN(T_STRING);
 }
 
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"#"|"//" {
+<ST_IN_SCRIPTING>"#"|"//" {
        while (YYCURSOR < YYLIMIT) {
                switch (*YYCURSOR++) {
                        case '\r':
@@ -1901,10 +1924,10 @@ inline_char_handler:
 
        yyleng = YYCURSOR - SCNG(yy_text);
 
-       return T_COMMENT;
+       RETURN_TOKEN(T_COMMENT);
 }
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"/*"|"/**"{WHITESPACE} {
+<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
        int doc_com;
 
        if (yyleng > 2) {
@@ -1931,27 +1954,15 @@ inline_char_handler:
 
        if (doc_com) {
                CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
-               return T_DOC_COMMENT;
+               RETURN_TOKEN(T_DOC_COMMENT);
        }
 
-       return T_COMMENT;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{LABEL} {
-    zend_copy_value(zendlval, yytext, yyleng);
-    yy_pop_state();
-    return T_STRING;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{ANY_CHAR} {
-    yyless(0);
-    yy_pop_state();
-    goto restart;
+       RETURN_TOKEN(T_COMMENT);
 }
 
 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
        BEGIN(INITIAL);
-       return T_CLOSE_TAG;  /* implicit ';' at php-end tag */
+       RETURN_TOKEN(T_CLOSE_TAG);  /* implicit ';' at php-end tag */
 }
 
 
@@ -1977,7 +1988,7 @@ inline_char_handler:
                         * for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
                         * rule, which continued in ST_IN_SCRIPTING state after the quote */
                        ZVAL_NULL(zendlval);
-                       return T_ENCAPSED_AND_WHITESPACE;
+                       RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
                }
        }
 
@@ -2020,7 +2031,7 @@ inline_char_handler:
                SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
                ZVAL_STRINGL(zendlval, str, sz);
        }
-       return T_CONSTANT_ENCAPSED_STRING;
+       RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
 }
 
 
@@ -2032,9 +2043,9 @@ inline_char_handler:
                        case '"':
                                yyleng = YYCURSOR - SCNG(yy_text);
                                if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == FAILURE) {
-                                       return T_ERROR;
+                                       RETURN_TOKEN(T_ERROR);
                                }
-                               return T_CONSTANT_ENCAPSED_STRING;
+                               RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
                        case '$':
                                if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
                                        break;
@@ -2064,7 +2075,7 @@ inline_char_handler:
        YYCURSOR = SCNG(yy_text) + yyleng;
 
        BEGIN(ST_DOUBLE_QUOTES);
-       return '"';
+       RETURN_TOKEN('"');
 }
 
 
@@ -2112,13 +2123,13 @@ inline_char_handler:
 
        zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
 
-       return T_START_HEREDOC;
+       RETURN_TOKEN(T_START_HEREDOC);
 }
 
 
 <ST_IN_SCRIPTING>[`] {
        BEGIN(ST_BACKQUOTE);
-       return '`';
+       RETURN_TOKEN('`');
 }
 
 
@@ -2132,7 +2143,7 @@ inline_char_handler:
        efree(heredoc_label);
 
        BEGIN(ST_IN_SCRIPTING);
-       return T_END_HEREDOC;
+       RETURN_TOKEN(T_END_HEREDOC);
 }
 
 
@@ -2140,18 +2151,18 @@ inline_char_handler:
        Z_LVAL_P(zendlval) = (zend_long) '{';
        yy_push_state(ST_IN_SCRIPTING);
        yyless(1);
-       return T_CURLY_OPEN;
+       RETURN_TOKEN(T_CURLY_OPEN);
 }
 
 
 <ST_DOUBLE_QUOTES>["] {
        BEGIN(ST_IN_SCRIPTING);
-       return '"';
+       RETURN_TOKEN('"');
 }
 
 <ST_BACKQUOTE>[`] {
        BEGIN(ST_IN_SCRIPTING);
-       return '`';
+       RETURN_TOKEN('`');
 }
 
 
@@ -2164,7 +2175,7 @@ inline_char_handler:
        }
 
        if (YYCURSOR > YYLIMIT) {
-               return 0;
+               RETURN_TOKEN(END);
        }
        if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
                YYCURSOR++;
@@ -2201,15 +2212,15 @@ double_quotes_scan_done:
        yyleng = YYCURSOR - SCNG(yy_text);
 
        if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) {
-               return T_ERROR;
+               RETURN_TOKEN(T_ERROR);
        }
-       return T_ENCAPSED_AND_WHITESPACE;
+       RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
 <ST_BACKQUOTE>{ANY_CHAR} {
        if (YYCURSOR > YYLIMIT) {
-               return 0;
+               RETURN_TOKEN(END);
        }
        if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
                YYCURSOR++;
@@ -2245,9 +2256,9 @@ double_quotes_scan_done:
        yyleng = YYCURSOR - SCNG(yy_text);
 
        if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) {
-               return T_ERROR;
+               RETURN_TOKEN(T_ERROR);
        }
-       return T_ENCAPSED_AND_WHITESPACE;
+       RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
@@ -2257,7 +2268,7 @@ double_quotes_scan_done:
        zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
 
        if (YYCURSOR > YYLIMIT) {
-               return 0;
+               RETURN_TOKEN(END);
        }
 
        YYCURSOR--;
@@ -2321,9 +2332,9 @@ heredoc_scan_done:
        yyleng = YYCURSOR - SCNG(yy_text);
 
        if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) {
-               return T_ERROR;
+               RETURN_TOKEN(T_ERROR);
        }
-       return T_ENCAPSED_AND_WHITESPACE;
+       RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
@@ -2333,7 +2344,7 @@ heredoc_scan_done:
        zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
 
        if (YYCURSOR > YYLIMIT) {
-               return 0;
+               RETURN_TOKEN(END);
        }
 
        YYCURSOR--;
@@ -2380,13 +2391,13 @@ nowdoc_scan_done:
 
        zend_copy_value(zendlval, yytext, yyleng - newline);
        HANDLE_NEWLINES(yytext, yyleng - newline);
-       return T_ENCAPSED_AND_WHITESPACE;
+       RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
 }
 
 
 <ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
        if (YYCURSOR > YYLIMIT) {
-               return 0;
+               RETURN_TOKEN(END);
        }
 
        zend_error(E_COMPILE_WARNING,"Unexpected character in input:  '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_000.phpt
new file mode 100644 (file)
index 0000000..03b991b
--- /dev/null
@@ -0,0 +1,19 @@
+--TEST--
+Parse errors during token_get_all() with TOKEN_PARSE flag
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+
+try {
+    token_get_all('<?php invalid code;', TOKEN_PARSE);
+} catch (ParseException $e) {
+    echo $e->getMessage(), PHP_EOL;
+}
+
+echo "Done";
+
+?>
+--EXPECT--
+syntax error, unexpected 'code' (T_STRING)
+Done
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_001.phpt
new file mode 100644 (file)
index 0000000..ab33435
--- /dev/null
@@ -0,0 +1,81 @@
+--TEST--
+Semi reserved words support: member access
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+$tokens = token_get_all('<?php
+X::continue;
+X::$continue;
+$x->$continue;
+X::continue();
+$x->continue();
+X::class;
+
+class X {
+    const CONTINUE = 1;
+    public $x = self::CONTINUE + 1;
+}
+', TOKEN_PARSE);
+
+array_walk($tokens, function($tk) {
+  if(is_array($tk)) {
+    if(($t = token_name($tk[0])) == 'T_WHITESPACE') return;
+    echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL;
+  }
+  else echo $tk, PHP_EOL;
+});
+
+echo "Done";
+
+?>
+--EXPECTF--
+L1: T_OPEN_TAG <?php
+
+L2: T_STRING X
+L2: T_DOUBLE_COLON ::
+L2: T_STRING continue
+;
+L3: T_STRING X
+L3: T_DOUBLE_COLON ::
+L3: T_VARIABLE $continue
+;
+L4: T_VARIABLE $x
+L4: T_OBJECT_OPERATOR ->
+L4: T_VARIABLE $continue
+;
+L5: T_STRING X
+L5: T_DOUBLE_COLON ::
+L5: T_STRING continue
+(
+)
+;
+L6: T_VARIABLE $x
+L6: T_OBJECT_OPERATOR ->
+L6: T_STRING continue
+(
+)
+;
+L7: T_STRING X
+L7: T_DOUBLE_COLON ::
+L7: T_CLASS class
+;
+L9: T_CLASS class
+L9: T_STRING X
+{
+L10: T_CONST const
+L10: T_STRING CONTINUE
+=
+L10: T_LNUMBER 1
+;
+L11: T_PUBLIC public
+L11: T_VARIABLE $x
+=
+L11: T_STRING self
+L11: T_DOUBLE_COLON ::
+L11: T_STRING CONTINUE
++
+L11: T_LNUMBER 1
+;
+}
+Done
diff --git a/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt b/ext/tokenizer/tests/token_get_all_TOKEN_PARSE_002.phpt
new file mode 100644 (file)
index 0000000..3dd8e14
--- /dev/null
@@ -0,0 +1,68 @@
+--TEST--
+Semi reserved words support: class const
+--SKIPIF--
+<?php if (!extension_loaded("tokenizer")) print "skip"; ?>
+--FILE--
+<?php
+$tokens = token_get_all('<?php
+  class SomeClass {
+      const CONST = 1;
+      const CONTINUE = (self::CONST + 1);
+      const ARRAY = [1, self::CONTINUE => [3, 4], 5];
+  }
+', TOKEN_PARSE);
+
+array_walk($tokens, function($tk) {
+  if(is_array($tk)) {
+    if(($t = token_name($tk[0])) == 'T_WHITESPACE') return;
+    echo "L{$tk[2]}: ".$t." {$tk[1]}", PHP_EOL;
+  }
+  else echo $tk, PHP_EOL;
+});
+
+echo "Done";
+
+?>
+--EXPECTF--
+L1: T_OPEN_TAG <?php
+
+L2: T_CLASS class
+L2: T_STRING SomeClass
+{
+L3: T_CONST const
+L3: T_STRING CONST
+=
+L3: T_LNUMBER 1
+;
+L4: T_CONST const
+L4: T_STRING CONTINUE
+=
+(
+L4: T_STRING self
+L4: T_DOUBLE_COLON ::
+L4: T_STRING CONST
++
+L4: T_LNUMBER 1
+)
+;
+L5: T_CONST const
+L5: T_STRING ARRAY
+=
+[
+L5: T_LNUMBER 1
+,
+L5: T_STRING self
+L5: T_DOUBLE_COLON ::
+L5: T_STRING CONTINUE
+L5: T_DOUBLE_ARROW =>
+[
+L5: T_LNUMBER 3
+,
+L5: T_LNUMBER 4
+]
+,
+L5: T_LNUMBER 5
+]
+;
+}
+Done
index 29e97c38c4071f3aabbec85b8090fd69d845eef2..9ded0a177425f4ac3b8b35b4a6aaa2c8a5b94af5 100644 (file)
@@ -19,7 +19,7 @@ var_dump( token_get_all());
 echo "-- Testing token_get_all() function with more than expected no. of arguments --\n";
 $source = '<?php ?>';
 $extra_arg = 10;
-var_dump( token_get_all($source, $extra_arg));
+var_dump( token_get_all($source, true, $extra_arg));
 
 echo "Done"
 ?>
@@ -28,10 +28,10 @@ echo "Done"
 
 -- Testing token_get_all() function with zero arguments --
 
-Warning: token_get_all() expects exactly 1 parameter, 0 given in %s on line %d
+Warning: token_get_all() expects at least 1 parameter, 0 given in %s on line 11
 NULL
 -- Testing token_get_all() function with more than expected no. of arguments --
 
-Warning: token_get_all() expects exactly 1 parameter, 2 given in %s on line %d
+Warning: token_get_all() expects at most 2 parameters, 3 given in %s on line 17
 NULL
-Done
+Done
\ No newline at end of file
index c4b9d14359fd91f6d079a13629f8c79fcd83e145..2a4fa90ca27984931eb149eaafab381750cab627 100644 (file)
 #define zendcursor LANG_SCNG(yy_cursor)
 #define zendlimit  LANG_SCNG(yy_limit)
 
+#define TOKEN_PARSE                            1
+
+void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) {
+       REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT);
+}
+
 /* {{{ arginfo */
 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
        ZEND_ARG_INFO(0, source)
@@ -83,6 +89,7 @@ ZEND_GET_MODULE(tokenizer)
 PHP_MINIT_FUNCTION(tokenizer)
 {
        tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
+       tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
        return SUCCESS;
 }
 /* }}} */
@@ -97,8 +104,10 @@ PHP_MINFO_FUNCTION(tokenizer)
 }
 /* }}} */
 
-static void tokenize(zval *return_value)
+static zend_bool tokenize(zval *return_value, zend_string *source)
 {
+       zval source_zval;
+       zend_lex_state original_lex_state;
        zval token;
        zval keyword;
        int token_type;
@@ -106,10 +115,22 @@ static void tokenize(zval *return_value)
        int token_line = 1;
        int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
 
+       ZVAL_STR_COPY(&source_zval, source);
+       zend_save_lexical_state(&original_lex_state);
+
+       if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
+               zend_restore_lexical_state(&original_lex_state);
+               return 0;
+       }
+
+       LANG_SCNG(yy_state) = yycINITIAL;
        array_init(return_value);
 
        ZVAL_NULL(&token);
        while ((token_type = lex_scan(&token))) {
+
+               if(token_type == T_ERROR) break;
+
                destroy = 1;
                switch (token_type) {
                        case T_CLOSE_TAG:
@@ -123,8 +144,6 @@ static void tokenize(zval *return_value)
                        case T_DOC_COMMENT:
                                destroy = 0;
                                break;
-                       case T_ERROR:
-                               return;
                }
 
                if (token_type >= 256) {
@@ -169,34 +188,113 @@ static void tokenize(zval *return_value)
 
                token_line = CG(zend_lineno);
        }
+
+       zval_dtor(&source_zval);
+       zend_restore_lexical_state(&original_lex_state);
+
+       return 1;
 }
 
-/* {{{ proto array token_get_all(string source)
- */
-PHP_FUNCTION(token_get_all)
+zval token_stream;
+
+void on_event(zend_php_scanner_event event, int token, int line)
 {
-       zend_string *source;
-       zval source_zval;
-       zend_lex_state original_lex_state;
+       zval keyword;
+       HashTable *tokens_ht;
+       zval *token_zv;
 
-       if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) {
-               return;
+       switch(event) {
+               case ON_TOKEN:
+                       if (token == T_ERROR || token == END) break;
+                       if (token >= 256) {
+                               array_init(&keyword);
+                               add_next_index_long(&keyword, token);
+                               add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
+                               add_next_index_long(&keyword, line);
+                               add_next_index_zval(&token_stream, &keyword);
+                       } else {
+                               add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
+                       }
+                       break;
+               case ON_FEEDBACK:
+                       tokens_ht = Z_ARRVAL(token_stream);
+                       token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1);
+                       if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) {
+                               ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token);
+                       }
+                       break;
+               case ON_STOP:
+                       if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
+                               array_init(&keyword);
+                               add_next_index_long(&keyword, T_INLINE_HTML);
+                               add_next_index_stringl(&keyword,
+                                       (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor));
+                               add_next_index_long(&keyword, CG(zend_lineno));
+                               add_next_index_zval(&token_stream, &keyword);
+                       }
+                       break;
        }
+}
+
+static zend_bool tokenize_parse(zval *return_value, zend_string *source)
+{
+       zval source_zval;
+       zend_lex_state original_lex_state;
+       zend_bool original_in_compilation;
+       zend_bool success;
 
        ZVAL_STR_COPY(&source_zval, source);
+
+       original_in_compilation = CG(in_compilation);
+       CG(in_compilation) = 1;
        zend_save_lexical_state(&original_lex_state);
 
-       if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
-               zend_restore_lexical_state(&original_lex_state);
-               RETURN_FALSE;
-       }
+       if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) {
+               CG(ast) = NULL;
+               CG(ast_arena) = zend_arena_create(1024 * 32);
+               LANG_SCNG(yy_state) = yycINITIAL;
+               LANG_SCNG(on_event) = on_event;
 
-       LANG_SCNG(yy_state) = yycINITIAL;
+               array_init(&token_stream);
+               if((success = (zendparse() == SUCCESS))) {
+                       ZVAL_ZVAL(return_value, &token_stream, 1, 0);
+               }
+               zval_dtor(&token_stream);
 
-       tokenize(return_value);
+               zend_ast_destroy(CG(ast));
+               zend_arena_destroy(CG(ast_arena));
+       }
 
+       /* restore compiler and scanner global states */
        zend_restore_lexical_state(&original_lex_state);
+       CG(in_compilation) = original_in_compilation;
+
        zval_dtor(&source_zval);
+
+       return success;
+}
+
+/* }}} */
+
+/* {{{ proto array token_get_all(string source)
+ */
+PHP_FUNCTION(token_get_all)
+{
+       zend_string *source;
+       zend_long flags = 0;
+       zend_bool success;
+
+       if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) {
+               return;
+       }
+
+       if (flags & TOKEN_PARSE) {
+               success = tokenize_parse(return_value, source);
+       } else {
+               success = tokenize(return_value, source);
+       }
+
+       if (!success) RETURN_FALSE;
 }
 /* }}} */