zend_stack_destroy(&SCNG(state_stack));
zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1);
zend_ptr_stack_destroy(&SCNG(heredoc_label_stack));
+ SCNG(on_event) = NULL;
}
ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)
lex_state->output_filter = SCNG(output_filter);
lex_state->script_encoding = SCNG(script_encoding);
+ lex_state->on_event = SCNG(on_event);
+
lex_state->ast = CG(ast);
lex_state->ast_arena = CG(ast_arena);
}
SCNG(output_filter) = lex_state->output_filter;
SCNG(script_encoding) = lex_state->script_encoding;
+ SCNG(on_event) = lex_state->on_event;
+
CG(ast) = lex_state->ast;
CG(ast_arena) = lex_state->ast_arena;
}
}
+ZEND_API void zend_lex_tstring(zval *zv)
+{
+ if (SCNG(on_event)) SCNG(on_event)(ON_FEEDBACK, T_STRING, 0);
+
+ ZVAL_STRINGL(zv, (char*)SCNG(yy_text), SCNG(yy_leng));
+}
+
#define BOM_UTF32_BE "\x00\x00\xfe\xff"
#define BOM_UTF32_LE "\xff\xfe\x00\x00"
#define BOM_UTF16_BE "\xfe\xff"
return SUCCESS;
}
+static zend_always_inline int emit_token(int token, int token_line)
+{
+ if(SCNG(on_event)) SCNG(on_event)(ON_TOKEN, token, token_line);
+
+ return token;
+}
+
+#define RETURN_TOKEN(token) return emit_token(token, start_line);
int lex_scan(zval *zendlval)
{
+
+int start_line = CG(zend_lineno);
+
restart:
SCNG(yy_text) = YYCURSOR;
<!*> := yyleng = YYCURSOR - SCNG(yy_text);
<ST_IN_SCRIPTING>"exit" {
- return T_EXIT;
+ RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"die" {
- return T_EXIT;
+ RETURN_TOKEN(T_EXIT);
}
<ST_IN_SCRIPTING>"function" {
- return T_FUNCTION;
+ RETURN_TOKEN(T_FUNCTION);
}
<ST_IN_SCRIPTING>"const" {
- return T_CONST;
+ RETURN_TOKEN(T_CONST);
}
<ST_IN_SCRIPTING>"return" {
- return T_RETURN;
+ RETURN_TOKEN(T_RETURN);
}
<ST_IN_SCRIPTING>"yield"{WHITESPACE}"from" {
- return T_YIELD_FROM;
+ RETURN_TOKEN(T_YIELD_FROM);
}
<ST_IN_SCRIPTING>"yield" {
- return T_YIELD;
+ RETURN_TOKEN(T_YIELD);
}
<ST_IN_SCRIPTING>"try" {
- return T_TRY;
+ RETURN_TOKEN(T_TRY);
}
<ST_IN_SCRIPTING>"catch" {
- return T_CATCH;
+ RETURN_TOKEN(T_CATCH);
}
<ST_IN_SCRIPTING>"finally" {
- return T_FINALLY;
+ RETURN_TOKEN(T_FINALLY);
}
<ST_IN_SCRIPTING>"throw" {
- return T_THROW;
+ RETURN_TOKEN(T_THROW);
}
<ST_IN_SCRIPTING>"if" {
- return T_IF;
+ RETURN_TOKEN(T_IF);
}
<ST_IN_SCRIPTING>"elseif" {
- return T_ELSEIF;
+ RETURN_TOKEN(T_ELSEIF);
}
<ST_IN_SCRIPTING>"endif" {
- return T_ENDIF;
+ RETURN_TOKEN(T_ENDIF);
}
<ST_IN_SCRIPTING>"else" {
- return T_ELSE;
+ RETURN_TOKEN(T_ELSE);
}
<ST_IN_SCRIPTING>"while" {
- return T_WHILE;
+ RETURN_TOKEN(T_WHILE);
}
<ST_IN_SCRIPTING>"endwhile" {
- return T_ENDWHILE;
+ RETURN_TOKEN(T_ENDWHILE);
}
<ST_IN_SCRIPTING>"do" {
- return T_DO;
+ RETURN_TOKEN(T_DO);
}
<ST_IN_SCRIPTING>"for" {
- return T_FOR;
+ RETURN_TOKEN(T_FOR);
}
<ST_IN_SCRIPTING>"endfor" {
- return T_ENDFOR;
+ RETURN_TOKEN(T_ENDFOR);
}
<ST_IN_SCRIPTING>"foreach" {
- return T_FOREACH;
+ RETURN_TOKEN(T_FOREACH);
}
<ST_IN_SCRIPTING>"endforeach" {
- return T_ENDFOREACH;
+ RETURN_TOKEN(T_ENDFOREACH);
}
<ST_IN_SCRIPTING>"declare" {
- return T_DECLARE;
+ RETURN_TOKEN(T_DECLARE);
}
<ST_IN_SCRIPTING>"enddeclare" {
- return T_ENDDECLARE;
+ RETURN_TOKEN(T_ENDDECLARE);
}
<ST_IN_SCRIPTING>"instanceof" {
- return T_INSTANCEOF;
+ RETURN_TOKEN(T_INSTANCEOF);
}
<ST_IN_SCRIPTING>"as" {
- return T_AS;
+ RETURN_TOKEN(T_AS);
}
<ST_IN_SCRIPTING>"switch" {
- return T_SWITCH;
+ RETURN_TOKEN(T_SWITCH);
}
<ST_IN_SCRIPTING>"endswitch" {
- return T_ENDSWITCH;
+ RETURN_TOKEN(T_ENDSWITCH);
}
<ST_IN_SCRIPTING>"case" {
- return T_CASE;
+ RETURN_TOKEN(T_CASE);
}
<ST_IN_SCRIPTING>"default" {
- return T_DEFAULT;
+ RETURN_TOKEN(T_DEFAULT);
}
<ST_IN_SCRIPTING>"break" {
- return T_BREAK;
+ RETURN_TOKEN(T_BREAK);
}
<ST_IN_SCRIPTING>"continue" {
- return T_CONTINUE;
+ RETURN_TOKEN(T_CONTINUE);
}
<ST_IN_SCRIPTING>"goto" {
- return T_GOTO;
+ RETURN_TOKEN(T_GOTO);
}
<ST_IN_SCRIPTING>"echo" {
- return T_ECHO;
+ RETURN_TOKEN(T_ECHO);
}
<ST_IN_SCRIPTING>"print" {
- return T_PRINT;
+ RETURN_TOKEN(T_PRINT);
}
<ST_IN_SCRIPTING>"class" {
- return T_CLASS;
+ RETURN_TOKEN(T_CLASS);
}
<ST_IN_SCRIPTING>"interface" {
- return T_INTERFACE;
+ RETURN_TOKEN(T_INTERFACE);
}
<ST_IN_SCRIPTING>"trait" {
- return T_TRAIT;
+ RETURN_TOKEN(T_TRAIT);
}
<ST_IN_SCRIPTING>"extends" {
- return T_EXTENDS;
+ RETURN_TOKEN(T_EXTENDS);
}
<ST_IN_SCRIPTING>"implements" {
- return T_IMPLEMENTS;
+ RETURN_TOKEN(T_IMPLEMENTS);
}
<ST_IN_SCRIPTING>"->" {
yy_push_state(ST_LOOKING_FOR_PROPERTY);
- return T_OBJECT_OPERATOR;
+ RETURN_TOKEN(T_OBJECT_OPERATOR);
}
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY,ST_LOOKING_FOR_SEMI_RESERVED_NAME>{WHITESPACE}+ {
+<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
HANDLE_NEWLINES(yytext, yyleng);
- return T_WHITESPACE;
+ RETURN_TOKEN(T_WHITESPACE);
}
<ST_LOOKING_FOR_PROPERTY>"->" {
- return T_OBJECT_OPERATOR;
+ RETURN_TOKEN(T_OBJECT_OPERATOR);
}
<ST_LOOKING_FOR_PROPERTY>{LABEL} {
yy_pop_state();
zend_copy_value(zendlval, yytext, yyleng);
- return T_STRING;
+ RETURN_TOKEN(T_STRING);
}
<ST_LOOKING_FOR_PROPERTY>{ANY_CHAR} {
}
<ST_IN_SCRIPTING>"::" {
- return T_PAAMAYIM_NEKUDOTAYIM;
+ RETURN_TOKEN(T_PAAMAYIM_NEKUDOTAYIM);
}
<ST_IN_SCRIPTING>"\\" {
- return T_NS_SEPARATOR;
+ RETURN_TOKEN(T_NS_SEPARATOR);
}
<ST_IN_SCRIPTING>"..." {
- return T_ELLIPSIS;
+ RETURN_TOKEN(T_ELLIPSIS);
}
<ST_IN_SCRIPTING>"??" {
- return T_COALESCE;
+ RETURN_TOKEN(T_COALESCE);
}
<ST_IN_SCRIPTING>"new" {
- return T_NEW;
+ RETURN_TOKEN(T_NEW);
}
<ST_IN_SCRIPTING>"clone" {
- return T_CLONE;
+ RETURN_TOKEN(T_CLONE);
}
<ST_IN_SCRIPTING>"var" {
- return T_VAR;
+ RETURN_TOKEN(T_VAR);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("int"|"integer"){TABS_AND_SPACES}")" {
- return T_INT_CAST;
+ RETURN_TOKEN(T_INT_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("real"|"double"|"float"){TABS_AND_SPACES}")" {
- return T_DOUBLE_CAST;
+ RETURN_TOKEN(T_DOUBLE_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("string"|"binary"){TABS_AND_SPACES}")" {
- return T_STRING_CAST;
+ RETURN_TOKEN(T_STRING_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"array"{TABS_AND_SPACES}")" {
- return T_ARRAY_CAST;
+ RETURN_TOKEN(T_ARRAY_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}"object"{TABS_AND_SPACES}")" {
- return T_OBJECT_CAST;
+ RETURN_TOKEN(T_OBJECT_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("bool"|"boolean"){TABS_AND_SPACES}")" {
- return T_BOOL_CAST;
+ RETURN_TOKEN(T_BOOL_CAST);
}
<ST_IN_SCRIPTING>"("{TABS_AND_SPACES}("unset"){TABS_AND_SPACES}")" {
- return T_UNSET_CAST;
+ RETURN_TOKEN(T_UNSET_CAST);
}
<ST_IN_SCRIPTING>"eval" {
- return T_EVAL;
+ RETURN_TOKEN(T_EVAL);
}
<ST_IN_SCRIPTING>"include" {
- return T_INCLUDE;
+ RETURN_TOKEN(T_INCLUDE);
}
<ST_IN_SCRIPTING>"include_once" {
- return T_INCLUDE_ONCE;
+ RETURN_TOKEN(T_INCLUDE_ONCE);
}
<ST_IN_SCRIPTING>"require" {
- return T_REQUIRE;
+ RETURN_TOKEN(T_REQUIRE);
}
<ST_IN_SCRIPTING>"require_once" {
- return T_REQUIRE_ONCE;
+ RETURN_TOKEN(T_REQUIRE_ONCE);
}
<ST_IN_SCRIPTING>"namespace" {
- return T_NAMESPACE;
+ RETURN_TOKEN(T_NAMESPACE);
}
<ST_IN_SCRIPTING>"use" {
- return T_USE;
+ RETURN_TOKEN(T_USE);
}
<ST_IN_SCRIPTING>"insteadof" {
- return T_INSTEADOF;
+ RETURN_TOKEN(T_INSTEADOF);
}
<ST_IN_SCRIPTING>"global" {
- return T_GLOBAL;
+ RETURN_TOKEN(T_GLOBAL);
}
<ST_IN_SCRIPTING>"isset" {
- return T_ISSET;
+ RETURN_TOKEN(T_ISSET);
}
<ST_IN_SCRIPTING>"empty" {
- return T_EMPTY;
+ RETURN_TOKEN(T_EMPTY);
}
<ST_IN_SCRIPTING>"__halt_compiler" {
- return T_HALT_COMPILER;
+ RETURN_TOKEN(T_HALT_COMPILER);
}
<ST_IN_SCRIPTING>"static" {
- return T_STATIC;
+ RETURN_TOKEN(T_STATIC);
}
<ST_IN_SCRIPTING>"abstract" {
- return T_ABSTRACT;
+ RETURN_TOKEN(T_ABSTRACT);
}
<ST_IN_SCRIPTING>"final" {
- return T_FINAL;
+ RETURN_TOKEN(T_FINAL);
}
<ST_IN_SCRIPTING>"private" {
- return T_PRIVATE;
+ RETURN_TOKEN(T_PRIVATE);
}
<ST_IN_SCRIPTING>"protected" {
- return T_PROTECTED;
+ RETURN_TOKEN(T_PROTECTED);
}
<ST_IN_SCRIPTING>"public" {
- return T_PUBLIC;
+ RETURN_TOKEN(T_PUBLIC);
}
<ST_IN_SCRIPTING>"unset" {
- return T_UNSET;
+ RETURN_TOKEN(T_UNSET);
}
<ST_IN_SCRIPTING>"=>" {
- return T_DOUBLE_ARROW;
+ RETURN_TOKEN(T_DOUBLE_ARROW);
}
<ST_IN_SCRIPTING>"list" {
- return T_LIST;
+ RETURN_TOKEN(T_LIST);
}
<ST_IN_SCRIPTING>"array" {
- return T_ARRAY;
+ RETURN_TOKEN(T_ARRAY);
}
<ST_IN_SCRIPTING>"callable" {
- return T_CALLABLE;
+ RETURN_TOKEN(T_CALLABLE);
}
<ST_IN_SCRIPTING>"++" {
- return T_INC;
+ RETURN_TOKEN(T_INC);
}
<ST_IN_SCRIPTING>"--" {
- return T_DEC;
+ RETURN_TOKEN(T_DEC);
}
<ST_IN_SCRIPTING>"===" {
- return T_IS_IDENTICAL;
+ RETURN_TOKEN(T_IS_IDENTICAL);
}
<ST_IN_SCRIPTING>"!==" {
- return T_IS_NOT_IDENTICAL;
+ RETURN_TOKEN(T_IS_NOT_IDENTICAL);
}
<ST_IN_SCRIPTING>"==" {
- return T_IS_EQUAL;
+ RETURN_TOKEN(T_IS_EQUAL);
}
<ST_IN_SCRIPTING>"!="|"<>" {
- return T_IS_NOT_EQUAL;
+ RETURN_TOKEN(T_IS_NOT_EQUAL);
}
<ST_IN_SCRIPTING>"<=>" {
- return T_SPACESHIP;
+ RETURN_TOKEN(T_SPACESHIP);
}
<ST_IN_SCRIPTING>"<=" {
- return T_IS_SMALLER_OR_EQUAL;
+ RETURN_TOKEN(T_IS_SMALLER_OR_EQUAL);
}
<ST_IN_SCRIPTING>">=" {
- return T_IS_GREATER_OR_EQUAL;
+ RETURN_TOKEN(T_IS_GREATER_OR_EQUAL);
}
<ST_IN_SCRIPTING>"+=" {
- return T_PLUS_EQUAL;
+ RETURN_TOKEN(T_PLUS_EQUAL);
}
<ST_IN_SCRIPTING>"-=" {
- return T_MINUS_EQUAL;
+ RETURN_TOKEN(T_MINUS_EQUAL);
}
<ST_IN_SCRIPTING>"*=" {
- return T_MUL_EQUAL;
+ RETURN_TOKEN(T_MUL_EQUAL);
}
<ST_IN_SCRIPTING>"*\*" {
- return T_POW;
+ RETURN_TOKEN(T_POW);
}
<ST_IN_SCRIPTING>"*\*=" {
- return T_POW_EQUAL;
+ RETURN_TOKEN(T_POW_EQUAL);
}
<ST_IN_SCRIPTING>"/=" {
- return T_DIV_EQUAL;
+ RETURN_TOKEN(T_DIV_EQUAL);
}
<ST_IN_SCRIPTING>".=" {
- return T_CONCAT_EQUAL;
+ RETURN_TOKEN(T_CONCAT_EQUAL);
}
<ST_IN_SCRIPTING>"%=" {
- return T_MOD_EQUAL;
+ RETURN_TOKEN(T_MOD_EQUAL);
}
<ST_IN_SCRIPTING>"<<=" {
- return T_SL_EQUAL;
+ RETURN_TOKEN(T_SL_EQUAL);
}
<ST_IN_SCRIPTING>">>=" {
- return T_SR_EQUAL;
+ RETURN_TOKEN(T_SR_EQUAL);
}
<ST_IN_SCRIPTING>"&=" {
- return T_AND_EQUAL;
+ RETURN_TOKEN(T_AND_EQUAL);
}
<ST_IN_SCRIPTING>"|=" {
- return T_OR_EQUAL;
+ RETURN_TOKEN(T_OR_EQUAL);
}
<ST_IN_SCRIPTING>"^=" {
- return T_XOR_EQUAL;
+ RETURN_TOKEN(T_XOR_EQUAL);
}
<ST_IN_SCRIPTING>"||" {
- return T_BOOLEAN_OR;
+ RETURN_TOKEN(T_BOOLEAN_OR);
}
<ST_IN_SCRIPTING>"&&" {
- return T_BOOLEAN_AND;
+ RETURN_TOKEN(T_BOOLEAN_AND);
}
<ST_IN_SCRIPTING>"OR" {
- return T_LOGICAL_OR;
+ RETURN_TOKEN(T_LOGICAL_OR);
}
<ST_IN_SCRIPTING>"AND" {
- return T_LOGICAL_AND;
+ RETURN_TOKEN(T_LOGICAL_AND);
}
<ST_IN_SCRIPTING>"XOR" {
- return T_LOGICAL_XOR;
+ RETURN_TOKEN(T_LOGICAL_XOR);
}
<ST_IN_SCRIPTING>"<<" {
- return T_SL;
+ RETURN_TOKEN(T_SL);
}
<ST_IN_SCRIPTING>">>" {
- return T_SR;
+ RETURN_TOKEN(T_SR);
}
<ST_IN_SCRIPTING>{TOKENS} {
- return yytext[0];
+ RETURN_TOKEN(yytext[0]);
}
<ST_IN_SCRIPTING>"{" {
yy_push_state(ST_IN_SCRIPTING);
- return '{';
+ RETURN_TOKEN('{');
}
<ST_DOUBLE_QUOTES,ST_BACKQUOTE,ST_HEREDOC>"${" {
yy_push_state(ST_LOOKING_FOR_VARNAME);
- return T_DOLLAR_OPEN_CURLY_BRACES;
+ RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES);
}
if (!zend_stack_is_empty(&SCNG(state_stack))) {
yy_pop_state();
}
- return '}';
+ RETURN_TOKEN('}');
}
zend_copy_value(zendlval, yytext, yyleng);
yy_pop_state();
yy_push_state(ST_IN_SCRIPTING);
- return T_STRING_VARNAME;
+ RETURN_TOKEN(T_STRING_VARNAME);
}
ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
ZEND_ASSERT(!errno && end == yytext + yyleng);
}
- return T_LNUMBER;
+ RETURN_TOKEN(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
}
*/
if (end != yytext + yyleng) {
zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
} else {
errno = 0;
if (end != yytext + yyleng) {
zend_throw_exception(zend_get_parse_exception(),
"Invalid numeric literal", E_PARSE);
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
ZEND_ASSERT(!errno);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
/* Also not an assert for the same reason */
if (end != yytext + yyleng) {
zend_throw_exception(zend_get_parse_exception(), "Invalid numeric literal", E_PARSE);
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
}
ZEND_ASSERT(!errno);
- return T_LNUMBER;
+ RETURN_TOKEN(T_LNUMBER);
}
<ST_IN_SCRIPTING>{HNUM} {
ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
ZEND_ASSERT(!errno && end == hex + len);
}
- return T_LNUMBER;
+ RETURN_TOKEN(T_LNUMBER);
} else {
ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == hex + len);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
}
string:
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
- return T_NUM_STRING;
+ RETURN_TOKEN(T_NUM_STRING);
}
<ST_VAR_OFFSET>{LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */
ZVAL_STRINGL(zendlval, yytext, yyleng);
- return T_NUM_STRING;
+ RETURN_TOKEN(T_NUM_STRING);
}
<ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
/* errno isn't checked since we allow HUGE_VAL/INF overflow */
ZEND_ASSERT(end == yytext + yyleng);
- return T_DNUMBER;
+ RETURN_TOKEN(T_DNUMBER);
}
<ST_IN_SCRIPTING>"__CLASS__" {
- return T_CLASS_C;
+ RETURN_TOKEN(T_CLASS_C);
}
<ST_IN_SCRIPTING>"__TRAIT__" {
- return T_TRAIT_C;
+ RETURN_TOKEN(T_TRAIT_C);
}
<ST_IN_SCRIPTING>"__FUNCTION__" {
- return T_FUNC_C;
+ RETURN_TOKEN(T_FUNC_C);
}
<ST_IN_SCRIPTING>"__METHOD__" {
- return T_METHOD_C;
+ RETURN_TOKEN(T_METHOD_C);
}
<ST_IN_SCRIPTING>"__LINE__" {
- return T_LINE;
+ RETURN_TOKEN(T_LINE);
}
<ST_IN_SCRIPTING>"__FILE__" {
- return T_FILE;
+ RETURN_TOKEN(T_FILE);
}
<ST_IN_SCRIPTING>"__DIR__" {
- return T_DIR;
+ RETURN_TOKEN(T_DIR);
}
<ST_IN_SCRIPTING>"__NAMESPACE__" {
- return T_NS_C;
+ RETURN_TOKEN(T_NS_C);
}
<INITIAL>"<?=" {
BEGIN(ST_IN_SCRIPTING);
- return T_OPEN_TAG_WITH_ECHO;
+ RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO);
}
<INITIAL>"<?php"([ \t]|{NEWLINE}) {
HANDLE_NEWLINE(yytext[yyleng-1]);
BEGIN(ST_IN_SCRIPTING);
- return T_OPEN_TAG;
+ RETURN_TOKEN(T_OPEN_TAG);
}
<INITIAL>"<?" {
if (CG(short_tags)) {
BEGIN(ST_IN_SCRIPTING);
- return T_OPEN_TAG;
+ RETURN_TOKEN(T_OPEN_TAG);
} else {
goto inline_char_handler;
}
<INITIAL>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
inline_char_handler:
ZVAL_STRINGL(zendlval, yytext, yyleng);
}
HANDLE_NEWLINES(yytext, yyleng);
- return T_INLINE_HTML;
+ RETURN_TOKEN(T_INLINE_HTML);
}
yyless(yyleng - 3);
yy_push_state(ST_LOOKING_FOR_PROPERTY);
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
- return T_VARIABLE;
+ RETURN_TOKEN(T_VARIABLE);
}
/* A [ always designates a variable offset, regardless of what follows
yyless(yyleng - 1);
yy_push_state(ST_VAR_OFFSET);
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
- return T_VARIABLE;
+ RETURN_TOKEN(T_VARIABLE);
}
<ST_IN_SCRIPTING,ST_DOUBLE_QUOTES,ST_HEREDOC,ST_BACKQUOTE,ST_VAR_OFFSET>"$"{LABEL} {
zend_copy_value(zendlval, (yytext+1), (yyleng-1));
- return T_VARIABLE;
+ RETURN_TOKEN(T_VARIABLE);
}
<ST_VAR_OFFSET>"]" {
yy_pop_state();
- return ']';
+ RETURN_TOKEN(']');
}
<ST_VAR_OFFSET>{TOKENS}|[{}"`] {
/* Only '[' can be valid, but returning other tokens will allow a more explicit parse error */
- return yytext[0];
+ RETURN_TOKEN(yytext[0]);
}
<ST_VAR_OFFSET>[ \n\r\t\\'#] {
yyless(0);
yy_pop_state();
ZVAL_NULL(zendlval);
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{LABEL} {
zend_copy_value(zendlval, yytext, yyleng);
- return T_STRING;
+ RETURN_TOKEN(T_STRING);
}
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"#"|"//" {
+<ST_IN_SCRIPTING>"#"|"//" {
while (YYCURSOR < YYLIMIT) {
switch (*YYCURSOR++) {
case '\r':
yyleng = YYCURSOR - SCNG(yy_text);
- return T_COMMENT;
+ RETURN_TOKEN(T_COMMENT);
}
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"/*"|"/**"{WHITESPACE} {
+<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
int doc_com;
if (yyleng > 2) {
if (doc_com) {
CG(doc_comment) = zend_string_init(yytext, yyleng, 0);
- return T_DOC_COMMENT;
+ RETURN_TOKEN(T_DOC_COMMENT);
}
- return T_COMMENT;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{LABEL} {
- zend_copy_value(zendlval, yytext, yyleng);
- yy_pop_state();
- return T_STRING;
-}
-
-<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{ANY_CHAR} {
- yyless(0);
- yy_pop_state();
- goto restart;
+ RETURN_TOKEN(T_COMMENT);
}
<ST_IN_SCRIPTING>"?>"{NEWLINE}? {
BEGIN(INITIAL);
- return T_CLOSE_TAG; /* implicit ';' at php-end tag */
+ RETURN_TOKEN(T_CLOSE_TAG); /* implicit ';' at php-end tag */
}
* for ' (unrecognized by parser), instead of old flex fallback to "Unexpected character..."
* rule, which continued in ST_IN_SCRIPTING state after the quote */
ZVAL_NULL(zendlval);
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
}
SCNG(output_filter)((unsigned char **)&str, &sz, (unsigned char *)s, (size_t)Z_STRLEN_P(zendlval));
ZVAL_STRINGL(zendlval, str, sz);
}
- return T_CONSTANT_ENCAPSED_STRING;
+ RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
}
case '"':
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext+bprefix+1, yyleng-bprefix-2, '"') == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_CONSTANT_ENCAPSED_STRING;
+ RETURN_TOKEN(T_CONSTANT_ENCAPSED_STRING);
case '$':
if (IS_LABEL_START(*YYCURSOR) || *YYCURSOR == '{') {
break;
YYCURSOR = SCNG(yy_text) + yyleng;
BEGIN(ST_DOUBLE_QUOTES);
- return '"';
+ RETURN_TOKEN('"');
}
zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) heredoc_label);
- return T_START_HEREDOC;
+ RETURN_TOKEN(T_START_HEREDOC);
}
<ST_IN_SCRIPTING>[`] {
BEGIN(ST_BACKQUOTE);
- return '`';
+ RETURN_TOKEN('`');
}
efree(heredoc_label);
BEGIN(ST_IN_SCRIPTING);
- return T_END_HEREDOC;
+ RETURN_TOKEN(T_END_HEREDOC);
}
Z_LVAL_P(zendlval) = (zend_long) '{';
yy_push_state(ST_IN_SCRIPTING);
yyless(1);
- return T_CURLY_OPEN;
+ RETURN_TOKEN(T_CURLY_OPEN);
}
<ST_DOUBLE_QUOTES>["] {
BEGIN(ST_IN_SCRIPTING);
- return '"';
+ RETURN_TOKEN('"');
}
<ST_BACKQUOTE>[`] {
BEGIN(ST_IN_SCRIPTING);
- return '`';
+ RETURN_TOKEN('`');
}
}
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext, yyleng, '"') == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
<ST_BACKQUOTE>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) {
YYCURSOR++;
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext, yyleng, '`') == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
YYCURSOR--;
yyleng = YYCURSOR - SCNG(yy_text);
if (zend_scan_escape_string(zendlval, yytext, yyleng - newline, 0) == FAILURE) {
- return T_ERROR;
+ RETURN_TOKEN(T_ERROR);
}
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
zend_heredoc_label *heredoc_label = zend_ptr_stack_top(&SCNG(heredoc_label_stack));
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
YYCURSOR--;
zend_copy_value(zendlval, yytext, yyleng - newline);
HANDLE_NEWLINES(yytext, yyleng - newline);
- return T_ENCAPSED_AND_WHITESPACE;
+ RETURN_TOKEN(T_ENCAPSED_AND_WHITESPACE);
}
<ST_IN_SCRIPTING,ST_VAR_OFFSET>{ANY_CHAR} {
if (YYCURSOR > YYLIMIT) {
- return 0;
+ RETURN_TOKEN(END);
}
zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE);
#define zendcursor LANG_SCNG(yy_cursor)
#define zendlimit LANG_SCNG(yy_limit)
+#define TOKEN_PARSE 1
+
+void tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS) {
+ REGISTER_LONG_CONSTANT("TOKEN_PARSE", TOKEN_PARSE, CONST_CS|CONST_PERSISTENT);
+}
+
/* {{{ arginfo */
ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
ZEND_ARG_INFO(0, source)
PHP_MINIT_FUNCTION(tokenizer)
{
tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
+ tokenizer_token_get_all_register_constants(INIT_FUNC_ARGS_PASSTHRU);
return SUCCESS;
}
/* }}} */
}
/* }}} */
-static void tokenize(zval *return_value)
+static zend_bool tokenize(zval *return_value, zend_string *source)
{
+ zval source_zval;
+ zend_lex_state original_lex_state;
zval token;
zval keyword;
int token_type;
int token_line = 1;
int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */
+ ZVAL_STR_COPY(&source_zval, source);
+ zend_save_lexical_state(&original_lex_state);
+
+ if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
+ zend_restore_lexical_state(&original_lex_state);
+ return 0;
+ }
+
+ LANG_SCNG(yy_state) = yycINITIAL;
array_init(return_value);
ZVAL_NULL(&token);
while ((token_type = lex_scan(&token))) {
+
+ if(token_type == T_ERROR) break;
+
destroy = 1;
switch (token_type) {
case T_CLOSE_TAG:
case T_DOC_COMMENT:
destroy = 0;
break;
- case T_ERROR:
- return;
}
if (token_type >= 256) {
token_line = CG(zend_lineno);
}
+
+ zval_dtor(&source_zval);
+ zend_restore_lexical_state(&original_lex_state);
+
+ return 1;
}
-/* {{{ proto array token_get_all(string source)
- */
-PHP_FUNCTION(token_get_all)
+zval token_stream;
+
+void on_event(zend_php_scanner_event event, int token, int line)
{
- zend_string *source;
- zval source_zval;
- zend_lex_state original_lex_state;
+ zval keyword;
+ HashTable *tokens_ht;
+ zval *token_zv;
- if (zend_parse_parameters(ZEND_NUM_ARGS(), "S", &source) == FAILURE) {
- return;
+ switch(event) {
+ case ON_TOKEN:
+ if (token == T_ERROR || token == END) break;
+ if (token >= 256) {
+ array_init(&keyword);
+ add_next_index_long(&keyword, token);
+ add_next_index_stringl(&keyword, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
+ add_next_index_long(&keyword, line);
+ add_next_index_zval(&token_stream, &keyword);
+ } else {
+ add_next_index_stringl(&token_stream, (char *)LANG_SCNG(yy_text), LANG_SCNG(yy_leng));
+ }
+ break;
+ case ON_FEEDBACK:
+ tokens_ht = Z_ARRVAL(token_stream);
+ token_zv = zend_hash_index_find(tokens_ht, zend_hash_num_elements(tokens_ht) - 1);
+ if (token_zv && Z_TYPE_P(token_zv) == IS_ARRAY) {
+ ZVAL_LONG(zend_hash_index_find(Z_ARRVAL_P(token_zv), 0), token);
+ }
+ break;
+ case ON_STOP:
+ if (LANG_SCNG(yy_cursor) != LANG_SCNG(yy_limit)) {
+ array_init(&keyword);
+ add_next_index_long(&keyword, T_INLINE_HTML);
+ add_next_index_stringl(&keyword,
+ (char *)LANG_SCNG(yy_cursor), LANG_SCNG(yy_limit) - LANG_SCNG(yy_cursor));
+ add_next_index_long(&keyword, CG(zend_lineno));
+ add_next_index_zval(&token_stream, &keyword);
+ }
+ break;
}
+}
+
+static zend_bool tokenize_parse(zval *return_value, zend_string *source)
+{
+ zval source_zval;
+ zend_lex_state original_lex_state;
+ zend_bool original_in_compilation;
+ zend_bool success;
ZVAL_STR_COPY(&source_zval, source);
+
+ original_in_compilation = CG(in_compilation);
+ CG(in_compilation) = 1;
zend_save_lexical_state(&original_lex_state);
- if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) {
- zend_restore_lexical_state(&original_lex_state);
- RETURN_FALSE;
- }
+ if ((success = (zend_prepare_string_for_scanning(&source_zval, "") == SUCCESS))) {
+ CG(ast) = NULL;
+ CG(ast_arena) = zend_arena_create(1024 * 32);
+ LANG_SCNG(yy_state) = yycINITIAL;
+ LANG_SCNG(on_event) = on_event;
- LANG_SCNG(yy_state) = yycINITIAL;
+ array_init(&token_stream);
+ if((success = (zendparse() == SUCCESS))) {
+ ZVAL_ZVAL(return_value, &token_stream, 1, 0);
+ }
+ zval_dtor(&token_stream);
- tokenize(return_value);
+ zend_ast_destroy(CG(ast));
+ zend_arena_destroy(CG(ast_arena));
+ }
+ /* restore compiler and scanner global states */
zend_restore_lexical_state(&original_lex_state);
+ CG(in_compilation) = original_in_compilation;
+
zval_dtor(&source_zval);
+
+ return success;
+}
+
+/* }}} */
+
+/* {{{ proto array token_get_all(string source)
+ */
+PHP_FUNCTION(token_get_all)
+{
+ zend_string *source;
+ zend_long flags = 0;
+ zend_bool success;
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS(), "S|l", &source, &flags) == FAILURE) {
+ return;
+ }
+
+ if (flags & TOKEN_PARSE) {
+ success = tokenize_parse(return_value, source);
+ } else {
+ success = tokenize(return_value, source);
+ }
+
+ if (!success) RETURN_FALSE;
}
/* }}} */