From: Alex Dowad Date: Wed, 8 Apr 2020 11:19:39 +0000 (+0200) Subject: Syntax errors caused by unclosed {, [, ( mention specific location X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=80598f12507c5cbde04163289e4d2575f05d2a0c;p=php Syntax errors caused by unclosed {, [, ( mention specific location Aside from a few very specific syntax errors for which detailed exceptions are thrown, generally PHP just emits the default error messages generated by bison on syntax error. These messages are very uninformative; they just say "Unexpected ... at line ...". This is most problematic with constructs which can span an arbitrary number of lines, such as blocks of code delimited by { }, 'if' conditions delimited by ( ), and so on. If a closing delimiter is missed, the block will run for the entire remainder of the source file (which could be thousands of lines), and then at the end, a parse error will be thrown with the dreaded words: "Unexpected end of file". Therefore, track the positions of opening and closing delimiters and ensure that they match up correctly. If any mismatch or missing delimiter is detected, immediately throw a parse error which points the user to the offending line. This is best done in the *lexer* and not in the parser. Thanks to Nikita Popov and George Peter Banyard for suggesting improvements. Fixes bug #79368. Closes GH-5364. --- diff --git a/NEWS b/NEWS index 2d1cc8a15c..430edada41 100644 --- a/NEWS +++ b/NEWS @@ -18,6 +18,8 @@ PHP NEWS (Nikita) . Fixed bug #79462 (method_exists and property_exists incoherent behavior). (cmb) + . Fixed bug #79368 ("Unexpected end of file" is not an acceptable error + message). (Alex Dowad) - CURL: . Bumped required libcurl version to 7.29.0. (cmb) diff --git a/Zend/tests/bug60099.phpt b/Zend/tests/bug60099.phpt index a8745fa522..fbb5e85fb7 100644 --- a/Zend/tests/bug60099.phpt +++ b/Zend/tests/bug60099.phpt @@ -7,4 +7,4 @@ namespace foo { ?> --EXPECTF-- -Parse error: syntax error, unexpected end of file in %s on line %d +Parse error: Unclosed '{' on line 2 in %s on line %d diff --git a/Zend/tests/eval_parse_error_with_doc_comment.phpt b/Zend/tests/eval_parse_error_with_doc_comment.phpt index 62561aaa79..09bc39b83d 100644 --- a/Zend/tests/eval_parse_error_with_doc_comment.phpt +++ b/Zend/tests/eval_parse_error_with_doc_comment.phpt @@ -12,4 +12,4 @@ EOC ?> --EXPECTF-- -Parse error: syntax error, unexpected end of file in %s(%d) : eval()'d code on line %d +Parse error: Unclosed '{' in %s(%d) : eval()'d code on line %d diff --git a/Zend/tests/require_parse_exception.phpt b/Zend/tests/require_parse_exception.phpt index 859589231e..d035e29624 100644 --- a/Zend/tests/require_parse_exception.phpt +++ b/Zend/tests/require_parse_exception.phpt @@ -43,8 +43,8 @@ var_dump("\u{ffffff}");'); ?> --EXPECT-- Deprecated: Directive 'allow_url_include' is deprecated in Unknown on line 0 -syntax error, unexpected end of file on line 2 -syntax error, unexpected end of file on line 3 +Unclosed '{' on line 2 +Unclosed '{' on line 3 syntax error, unexpected end of file, expecting '(' on line 2 Invalid numeric literal on line 2 Invalid UTF-8 codepoint escape sequence on line 2 diff --git a/Zend/zend_globals.h b/Zend/zend_globals.h index 2dc769a5ef..b7139fdfd6 100644 --- a/Zend/zend_globals.h +++ b/Zend/zend_globals.h @@ -285,6 +285,7 @@ struct _zend_php_scanner_globals { int yy_state; zend_stack state_stack; zend_ptr_stack heredoc_label_stack; + zend_stack nest_location_stack; /* for syntax error reporting */ zend_bool heredoc_scan_ahead; int heredoc_indentation; zend_bool heredoc_indentation_uses_spaces; diff --git a/Zend/zend_language_scanner.h b/Zend/zend_language_scanner.h index 4d51a064fc..35eccaf7e6 100644 --- a/Zend/zend_language_scanner.h +++ b/Zend/zend_language_scanner.h @@ -30,6 +30,7 @@ typedef struct _zend_lex_state { int yy_state; zend_stack state_stack; zend_ptr_stack heredoc_label_stack; + zend_stack nest_location_stack; /* for syntax error reporting */ zend_file_handle *in; uint32_t lineno; @@ -63,6 +64,12 @@ typedef struct _zend_heredoc_label { zend_bool indentation_uses_spaces; } zend_heredoc_label; +/* Track locations of unclosed {, [, (, etc. for better syntax error reporting */ +typedef struct _zend_nest_location { + char text; + int lineno; +} zend_nest_location; + BEGIN_EXTERN_C() ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state); ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state); diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index 984d73474b..8a3e88edfc 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -192,6 +192,7 @@ void startup_scanner(void) CG(doc_comment) = NULL; CG(extra_fn_flags) = 0; zend_stack_init(&SCNG(state_stack), sizeof(int)); + zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location)); zend_ptr_stack_init(&SCNG(heredoc_label_stack)); SCNG(heredoc_scan_ahead) = 0; } @@ -205,6 +206,7 @@ void shutdown_scanner(void) CG(parse_error) = 0; RESET_DOC_COMMENT(); zend_stack_destroy(&SCNG(state_stack)); + zend_stack_destroy(&SCNG(nest_location_stack)); zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); SCNG(heredoc_scan_ahead) = 0; @@ -223,6 +225,9 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state) lex_state->state_stack = SCNG(state_stack); zend_stack_init(&SCNG(state_stack), sizeof(int)); + lex_state->nest_location_stack = SCNG(nest_location_stack); + zend_stack_init(&SCNG(nest_location_stack), sizeof(zend_nest_location)); + lex_state->heredoc_label_stack = SCNG(heredoc_label_stack); zend_ptr_stack_init(&SCNG(heredoc_label_stack)); @@ -258,6 +263,9 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state) zend_stack_destroy(&SCNG(state_stack)); SCNG(state_stack) = lex_state->state_stack; + zend_stack_destroy(&SCNG(nest_location_stack)); + SCNG(nest_location_stack) = lex_state->nest_location_stack; + zend_ptr_stack_clean(&SCNG(heredoc_label_stack), (void (*)(void *)) &heredoc_label_dtor, 1); zend_ptr_stack_destroy(&SCNG(heredoc_label_stack)); SCNG(heredoc_label_stack) = lex_state->heredoc_label_stack; @@ -1250,6 +1258,63 @@ static void copy_heredoc_label_stack(void *void_heredoc_label) zend_ptr_stack_push(&SCNG(heredoc_label_stack), (void *) new_heredoc_label); } +/* Check that { }, [ ], ( ) are nested correctly */ +static void report_bad_nesting(char opening, int opening_lineno, char closing) +{ + char buf[256]; + size_t used = 0; + + used = snprintf(buf, sizeof(buf), "Unclosed '%c'", opening); + + if (opening_lineno != CG(zend_lineno)) { + used += snprintf(buf + used, sizeof(buf) - used, " on line %d", opening_lineno); + } + + if (closing) { /* 'closing' will be 0 if at end of file */ + used += snprintf(buf + used, sizeof(buf) - used, " does not match '%c'", closing); + } + + zend_throw_exception(zend_ce_parse_error, buf, 0); +} + +static void enter_nesting(char opening) +{ + zend_nest_location nest_loc = {opening, CG(zend_lineno)}; + zend_stack_push(&SCNG(nest_location_stack), &nest_loc); +} + +static int exit_nesting(char closing) +{ + if (zend_stack_is_empty(&SCNG(nest_location_stack))) { + zend_throw_exception_ex(zend_ce_parse_error, 0, "Unmatched '%c'", closing); + return -1; + } + + zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack)); + char opening = nest_loc->text; + + if ((opening == '{' && closing != '}') || + (opening == '[' && closing != ']') || + (opening == '(' && closing != ')')) { + report_bad_nesting(opening, nest_loc->lineno, closing); + return -1; + } + + zend_stack_del_top(&SCNG(nest_location_stack)); + return 0; +} + +static int check_nesting_at_end() +{ + if (!zend_stack_is_empty(&SCNG(nest_location_stack))) { + zend_nest_location *nest_loc = zend_stack_top(&SCNG(nest_location_stack)); + report_bad_nesting(nest_loc->text, nest_loc->lineno, 0); + return -1; + } + + return 0; +} + #define PARSER_MODE() \ EXPECTED(elem != NULL) @@ -1277,6 +1342,22 @@ static void copy_heredoc_label_stack(void *void_heredoc_label) goto emit_token; \ } while (0) +#define RETURN_EXIT_NESTING_TOKEN(_token) do { \ + if (exit_nesting(_token) && PARSER_MODE()) { \ + RETURN_TOKEN(T_ERROR); \ + } else { \ + RETURN_TOKEN(_token); \ + } \ + } while(0) + +#define RETURN_END_TOKEN do { \ + if (check_nesting_at_end() && PARSER_MODE()) { \ + RETURN_TOKEN(T_ERROR); \ + } else { \ + RETURN_TOKEN(END); \ + } \ + } while (0) + int ZEND_FASTCALL lex_scan(zval *zendlval, zend_parser_stack_elem *elem) { int token; @@ -1297,7 +1378,7 @@ BNUM "0b"[01]+(_[01]+)* LABEL [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]* WHITESPACE [ \n\r\t]+ TABS_AND_SPACES [ \t]* -TOKENS [;:,.\[\]()|^&+-/*=%!~$<>?@] +TOKENS [;:,.|^&+-/*=%!~$<>?@] ANY_CHAR [^] NEWLINE ("\r"|"\n"|"\r\n") @@ -1770,6 +1851,16 @@ NEWLINE ("\r"|"\n"|"\r\n") RETURN_TOKEN(T_SR); } +"]"|")" { + /* Check that ] and ) match up properly with a preceding [ or ( */ + RETURN_EXIT_NESTING_TOKEN(yytext[0]); +} + +"["|"(" { + enter_nesting(yytext[0]); + RETURN_TOKEN(yytext[0]); +} + {TOKENS} { RETURN_TOKEN(yytext[0]); } @@ -1777,22 +1868,23 @@ NEWLINE ("\r"|"\n"|"\r\n") "{" { yy_push_state(ST_IN_SCRIPTING); + enter_nesting('{'); RETURN_TOKEN('{'); } "${" { yy_push_state(ST_LOOKING_FOR_VARNAME); + enter_nesting('{'); RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES); } - "}" { RESET_DOC_COMMENT(); if (!zend_stack_is_empty(&SCNG(state_stack))) { yy_pop_state(); } - RETURN_TOKEN('}'); + RETURN_EXIT_NESTING_TOKEN('}'); } @@ -2088,7 +2180,7 @@ string: {ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } inline_char_handler: @@ -2165,7 +2257,7 @@ inline_char_handler: RETURN_TOKEN(']'); } -{TOKENS}|[{}"`] { +{TOKENS}|[[(){}"`] { /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */ RETURN_TOKEN(yytext[0]); } @@ -2569,6 +2661,7 @@ skip_escape_conversion: "{$" { yy_push_state(ST_IN_SCRIPTING); yyless(1); + enter_nesting('{'); RETURN_TOKEN(T_CURLY_OPEN); } @@ -2593,7 +2686,7 @@ skip_escape_conversion: } if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { YYCURSOR++; @@ -2640,7 +2733,7 @@ double_quotes_scan_done: {ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } if (yytext[0] == '\\' && YYCURSOR < YYLIMIT) { YYCURSOR++; @@ -2689,7 +2782,7 @@ double_quotes_scan_done: int newline = 0, indentation = 0, spacing = 0; if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } YYCURSOR--; @@ -2813,7 +2906,7 @@ heredoc_scan_done: int newline = 0, indentation = 0, spacing = -1; if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } YYCURSOR--; @@ -2901,7 +2994,7 @@ nowdoc_scan_done: {ANY_CHAR} { if (YYCURSOR > YYLIMIT) { - RETURN_TOKEN(END); + RETURN_END_TOKEN; } RETURN_TOKEN(T_BAD_CHARACTER); diff --git a/ext/tokenizer/tests/token_get_all_variation17.phpt b/ext/tokenizer/tests/token_get_all_variation17.phpt index dcbabccc76..da4df47594 100644 --- a/ext/tokenizer/tests/token_get_all_variation17.phpt +++ b/ext/tokenizer/tests/token_get_all_variation17.phpt @@ -32,6 +32,7 @@ try { } catch(Exception $e) { echo "caught exception:"; } +} ?>'; $tokens = token_get_all($source); var_dump($tokens); @@ -40,7 +41,7 @@ echo "Done" ?> --EXPECTF-- *** Testing token_get_all() : with exception keywords *** -array(81) { +array(83) { [0]=> array(3) { [0]=> @@ -601,13 +602,25 @@ array(81) { int(14) } [80]=> + string(1) "}" + [81]=> array(3) { [0]=> int(%d) [1]=> - string(2) "?>" + string(1) " +" [2]=> int(15) } + [82]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(2) "?>" + [2]=> + int(16) + } } Done diff --git a/ext/tokenizer/tokenizer.c b/ext/tokenizer/tokenizer.c index 222c3e96a3..364d70bf08 100644 --- a/ext/tokenizer/tokenizer.c +++ b/ext/tokenizer/tokenizer.c @@ -392,6 +392,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en array_init(return_value); while ((token_type = lex_scan(&token, NULL))) { + ZEND_ASSERT(token_type != T_ERROR); + add_token( return_value, token_type, zendtext, zendleng, token_line, token_class, &interned_strings); @@ -408,7 +410,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_class_en && --need_tokens == 0 ) { /* fetch the rest into a T_INLINE_HTML */ - if (zendcursor != zendlimit) { + if (zendcursor < zendlimit) { add_token( return_value, T_INLINE_HTML, zendcursor, zendlimit - zendcursor, token_line, token_class, &interned_strings); diff --git a/tests/lang/syntax_errors.phpt b/tests/lang/syntax_errors.phpt new file mode 100644 index 0000000000..1fecb035de --- /dev/null +++ b/tests/lang/syntax_errors.phpt @@ -0,0 +1,57 @@ +--TEST-- +Detailed reporting on specific types of syntax errors +--FILE-- + 2", /* unclosed ( */ + "[1, 2,", /* unclosed [ */ + "if(1) { echo 'hello'; ", /* unclosed { */ + "(1 + 2));", /* too many ) */ + "[1, 2]]", /* too many ] */ + "if (1) { } }", /* too many } */ + "(1 + 2];", /* ] doesn't match ( */ + "[1, 2)];", /* ) doesn't match [ */ + "if(1) { echo 'a'; )}", /* ) doesn't match { */ + /* separately test cases where the faulty construct spans multiple lines, + since the error message should refer to the starting line in those cases */ + "if(1 > 2) {\n echo '1';", /* unclosed (, spans multiple lines */ + "[1,\n2,\n3,", /* unclosed [, spans multiple lines */ + "{\n echo '1';\n echo '2';", /* unclosed {, spans multiple lines */ + "(1 +\n 2 +\n 3))", /* too many ), spans multiple lines */ + "[1,\n2,\n3]];", /* too many ], spans multiple lines */ + "if (1)\n {\n }}", /* too many }, spans multiple lines */ + "(1 +\n\n 2])", /* ] doesn't match (, spans multiple lines */ + "[1,\n2,\n3)]", /* ) doesn't match [, spans multiple lines */ + "if(1) {\n echo 'a';\n)}", /* ) doesn't match {, spans multiple lines */ + ]; + +foreach ($badCode as $code) { + try { + eval($code); + } catch (ParseError $e) { + echo $e->getMessage(), "\n"; + } +} + +echo "==DONE==\n"; +?> +--EXPECT-- +Unclosed '(' +Unclosed '[' +Unclosed '{' +Unmatched ')' +Unmatched ']' +Unmatched '}' +Unclosed '(' does not match ']' +Unclosed '[' does not match ')' +Unclosed '{' does not match ')' +Unclosed '{' on line 1 +Unclosed '[' on line 1 +Unclosed '{' on line 1 +Unmatched ')' +Unmatched ']' +Unmatched '}' +Unclosed '(' on line 1 does not match ']' +Unclosed '[' on line 1 does not match ')' +Unclosed '{' on line 1 does not match ')' +==DONE==