]> granicus.if.org Git - php/commitdiff
implement context sensitive language with lexical feedback
authorMárcio Almada <marcio3w@gmail.com>
Mon, 9 Mar 2015 05:24:50 +0000 (02:24 -0300)
committerMárcio Almada <marcio3w@gmail.com>
Thu, 30 Apr 2015 06:03:29 +0000 (03:03 -0300)
The implementation has no regression risks, has an even smaller footprint
compared to the previous attempt involving a pure lexical approach, is higly
predictable and higly configurable.

To turn a word semi-reserved you only need to edit the "SEMI_RESERVED" parser rule,
it's an inclusive list of all the words that should be matched as T_STRING on specific contexts.
Example:

```
method_modifiers function returns_ref indentifier '(' parameter_list ')' ...
```

instead of:

```
method_modifiers function returns_ref T_STRING '(' parameter_list ')' ...
```

TODO: port ext tokenizer

Zend/zend_language_parser.y
Zend/zend_language_scanner.l

index 2541c9f5713eabec4929b402befecf41a578fc06..cefcd0cad928e936ae42b1610d218afee70cbf3b 100644 (file)
@@ -35,6 +35,7 @@
 #include "zend_globals.h"
 #include "zend_API.h"
 #include "zend_constants.h"
+#include "zend_language_scanner_defs.h"
 
 #define YYSIZE_T size_t
 #define yytnamerr zend_yytnamerr
@@ -48,6 +49,12 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
 #define YYFREE free
 #endif
 
+#define REWIND { \
+       zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
+       LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
+       LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
+       LANG_SCNG(yy_leng)   = 0; }
+
 %}
 
 %pure_parser
@@ -243,7 +250,7 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
 %type <ast> absolute_trait_method_reference trait_method_reference property echo_expr
 %type <ast> new_expr anonymous_class class_name class_name_reference simple_variable
 %type <ast> internal_functions_in_yacc
-%type <ast> exit_expr scalar backticks_expr lexical_var function_call member_name
+%type <ast> exit_expr scalar backticks_expr lexical_var function_call member_name property_name
 %type <ast> variable_class_name dereferencable_scalar class_name_scalar constant dereferencable
 %type <ast> callable_expr callable_variable static_member new_variable
 %type <ast> assignment_list_element array_pair encaps_var encaps_var_offset isset_variables
@@ -252,10 +259,11 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
 %type <ast> echo_expr_list unset_variables catch_list parameter_list class_statement_list
 %type <ast> implements_list case_list if_stmt_without_else
 %type <ast> non_empty_parameter_list argument_list non_empty_argument_list property_list
-%type <ast> class_const_list name_list trait_adaptations method_body non_empty_for_exprs
+%type <ast> class_const_list class_const_decl name_list trait_adaptations method_body non_empty_for_exprs
 %type <ast> ctor_arguments alt_if_stmt_without_else trait_adaptation_list lexical_vars
 %type <ast> lexical_var_list encaps_list array_pair_list non_empty_array_pair_list
 %type <ast> assignment_list isset_variable type return_type
+%type <ast> identifier
 
 %type <num> returns_ref function is_reference is_variadic variable_modifiers
 %type <num> method_modifiers trait_modifiers non_empty_member_modifiers member_modifier
@@ -269,6 +277,22 @@ start:
        top_statement_list      { CG(ast) = $1; }
 ;
 
+semi_reserved:
+         T_INCLUDE | T_INCLUDE_ONCE | T_EVAL | T_REQUIRE | T_REQUIRE_ONCE | T_LOGICAL_OR | T_LOGICAL_XOR | T_LOGICAL_AND
+       | T_INSTANCEOF | T_NEW | T_CLONE | T_EXIT | T_IF | T_ELSEIF | T_ELSE | T_ENDIF | T_ECHO | T_DO | T_WHILE | T_ENDWHILE
+       | T_FOR | T_ENDFOR | T_FOREACH | T_ENDFOREACH | T_DECLARE | T_ENDDECLARE | T_AS | T_TRY | T_CATCH | T_FINALLY
+       | T_THROW | T_USE | T_INSTEADOF | T_GLOBAL | T_VAR | T_UNSET | T_ISSET | T_EMPTY | T_CONTINUE | T_GOTO
+       | T_FUNCTION | T_CONST | T_RETURN | T_PRINT | T_YIELD | T_LIST | T_SWITCH | T_ENDSWITCH | T_CASE | T_DEFAULT | T_BREAK
+       | T_ARRAY | T_CALLABLE | T_EXTENDS | T_IMPLEMENTS | T_NAMESPACE | T_TRAIT | T_INTERFACE
+       // | T_STATIC | T_ABSTRACT | T_FINAL | T_PRIVATE | T_PROTECTED | T_PUBLIC
+       // | T_CLASS
+;
+
+identifier:
+               T_STRING { $$ = $1; }
+       |       /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
+;
+
 top_statement_list:
                top_statement_list top_statement { $$ = zend_ast_list_add($1, $2); }
        |       /* empty */ { $$ = zend_ast_create_list(0, ZEND_AST_STMT_LIST); }
@@ -673,7 +697,7 @@ class_statement:
                        { $$ = $2; RESET_DOC_COMMENT(); }
        |       T_USE name_list trait_adaptations
                        { $$ = zend_ast_create(ZEND_AST_USE_TRAIT, $2, $3); }
-       |       method_modifiers function returns_ref T_STRING '(' parameter_list ')'
+       |       method_modifiers function returns_ref identifier '(' parameter_list ')'
                return_type backup_doc_comment method_body
                        { $$ = zend_ast_create_decl(ZEND_AST_METHOD, $3 | $1, $2, $9,
                                  zend_ast_get_str($4), $6, NULL, $10, $8); }
@@ -708,20 +732,20 @@ trait_precedence:
 ;
 
 trait_alias:
-               trait_method_reference T_AS trait_modifiers T_STRING
+               trait_method_reference T_AS trait_modifiers identifier
                        { $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, $4); }
        |       trait_method_reference T_AS member_modifier
                        { $$ = zend_ast_create_ex(ZEND_AST_TRAIT_ALIAS, $3, $1, NULL); }
 ;
 
 trait_method_reference:
-               T_STRING
+               identifier
                        { $$ = zend_ast_create(ZEND_AST_METHOD_REFERENCE, NULL, $1); }
        |       absolute_trait_method_reference { $$ = $1; }
 ;
 
 absolute_trait_method_reference:
-       name T_PAAMAYIM_NEKUDOTAYIM T_STRING
+       name T_PAAMAYIM_NEKUDOTAYIM identifier
                { $$ = zend_ast_create(ZEND_AST_METHOD_REFERENCE, $1, $3); }
 ;
 
@@ -773,8 +797,12 @@ property:
 ;
 
 class_const_list:
-               class_const_list ',' const_decl { $$ = zend_ast_list_add($1, $3); }
-       |       const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); }
+               class_const_list ',' class_const_decl { $$ = zend_ast_list_add($1, $3); }
+       |       class_const_decl { $$ = zend_ast_create_list(1, ZEND_AST_CLASS_CONST_DECL, $1); }
+;
+
+class_const_decl:
+       identifier '=' expr { $$ = zend_ast_create(ZEND_AST_CONST_ELEM, $1, $3); }
 ;
 
 const_decl:
@@ -1034,9 +1062,9 @@ scalar:
 
 constant:
                name { $$ = zend_ast_create(ZEND_AST_CONST, $1); }
-       |       class_name T_PAAMAYIM_NEKUDOTAYIM T_STRING
+       |       class_name T_PAAMAYIM_NEKUDOTAYIM identifier
                        { $$ = zend_ast_create(ZEND_AST_CLASS_CONST, $1, $3); }
-       |       variable_class_name T_PAAMAYIM_NEKUDOTAYIM T_STRING
+       |       variable_class_name T_PAAMAYIM_NEKUDOTAYIM identifier
                        { $$ = zend_ast_create(ZEND_AST_CLASS_CONST, $1, $3); }
 ;
 
@@ -1080,7 +1108,7 @@ callable_variable:
                        { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
        |       dereferencable '{' expr '}'
                        { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
-       |       dereferencable T_OBJECT_OPERATOR member_name argument_list
+       |       dereferencable T_OBJECT_OPERATOR property_name argument_list
                        { $$ = zend_ast_create(ZEND_AST_METHOD_CALL, $1, $3, $4); }
        |       function_call { $$ = $1; }
 ;
@@ -1090,7 +1118,7 @@ variable:
                        { $$ = $1; }
        |       static_member
                        { $$ = $1; }
-       |       dereferencable T_OBJECT_OPERATOR member_name
+       |       dereferencable T_OBJECT_OPERATOR property_name
                        { $$ = zend_ast_create(ZEND_AST_PROP, $1, $3); }
 ;
 
@@ -1114,7 +1142,7 @@ new_variable:
                        { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
        |       new_variable '{' expr '}'
                        { $$ = zend_ast_create(ZEND_AST_DIM, $1, $3); }
-       |       new_variable T_OBJECT_OPERATOR member_name
+       |       new_variable T_OBJECT_OPERATOR property_name
                        { $$ = zend_ast_create(ZEND_AST_PROP, $1, $3); }
        |       class_name T_PAAMAYIM_NEKUDOTAYIM simple_variable
                        { $$ = zend_ast_create(ZEND_AST_STATIC_PROP, $1, $3); }
@@ -1123,7 +1151,13 @@ new_variable:
 ;
 
 member_name:
-               T_STRING                { $$ = $1; }
+               identifier { $$ = $1; }
+       |       '{' expr '}'    { $$ = $2; }
+       |       simple_variable { $$ = zend_ast_create(ZEND_AST_VAR, $1); }
+;
+
+property_name:
+               T_STRING { $$ = $1; }
        |       '{' expr '}'    { $$ = $2; }
        |       simple_variable { $$ = zend_ast_create(ZEND_AST_VAR, $1); }
 ;
index fdba4b9f07685b666fd523494e697f5e67505510..2481af605b7df70f83372df76e11751ed42a9f6e 100644 (file)
@@ -1271,7 +1271,7 @@ NEWLINE ("\r"|"\n"|"\r\n")
        return T_OBJECT_OPERATOR;
 }
 
-<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY>{WHITESPACE}+ {
+<ST_IN_SCRIPTING,ST_LOOKING_FOR_PROPERTY,ST_LOOKING_FOR_SEMI_RESERVED_NAME>{WHITESPACE}+ {
        HANDLE_NEWLINES(yytext, yyleng);
        return T_WHITESPACE;
 }
@@ -1875,7 +1875,7 @@ inline_char_handler:
 }
 
 
-<ST_IN_SCRIPTING>"#"|"//" {
+<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"#"|"//" {
        while (YYCURSOR < YYLIMIT) {
                switch (*YYCURSOR++) {
                        case '\r':
@@ -1904,7 +1904,7 @@ inline_char_handler:
        return T_COMMENT;
 }
 
-<ST_IN_SCRIPTING>"/*"|"/**"{WHITESPACE} {
+<ST_IN_SCRIPTING,ST_LOOKING_FOR_SEMI_RESERVED_NAME>"/*"|"/**"{WHITESPACE} {
        int doc_com;
 
        if (yyleng > 2) {
@@ -1937,6 +1937,18 @@ inline_char_handler:
        return T_COMMENT;
 }
 
+<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{LABEL} {
+    zend_copy_value(zendlval, yytext, yyleng);
+    yy_pop_state();
+    return T_STRING;
+}
+
+<ST_LOOKING_FOR_SEMI_RESERVED_NAME>{ANY_CHAR} {
+    yyless(0);
+    yy_pop_state();
+    goto restart;
+}
+
 <ST_IN_SCRIPTING>"?>"{NEWLINE}? {
        BEGIN(INITIAL);
        return T_CLOSE_TAG;  /* implicit ';' at php-end tag */