From bcacb7545c5a23a161431a3399917ab5baf138b0 Mon Sep 17 00:00:00 2001 From: Dmitry Stogov Date: Thu, 15 Mar 2018 01:33:17 +0300 Subject: [PATCH] PHP scanner optimization --- Zend/zend_language_scanner.c | 809 ++++++++++++++++++----------------- Zend/zend_language_scanner.l | 113 +++-- 2 files changed, 498 insertions(+), 424 deletions(-) diff --git a/Zend/zend_language_scanner.c b/Zend/zend_language_scanner.c index 62bb7b576c..d0a6ce2126 100644 --- a/Zend/zend_language_scanner.c +++ b/Zend/zend_language_scanner.c @@ -876,6 +876,8 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \ ZVAL_STRINGL(zendlval, s, sz); \ efree(s); \ + } else if (yyleng == 1) { \ + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \ } else { \ ZVAL_STRINGL(zendlval, yytext, yyleng); \ } @@ -885,11 +887,38 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot register char *s, *t; char *end; + if (len <= 1) { + if (len < 1) { + ZVAL_EMPTY_STRING(zendlval); + } else { + zend_uchar c = (zend_uchar)*str; + if (c == '\n' || c == '\r') { + CG(zend_lineno)++; + } + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c)); + } + goto skip_escape_conversion; + } + ZVAL_STRINGL(zendlval, str, len); /* convert escape sequences */ - s = t = Z_STRVAL_P(zendlval); + s = Z_STRVAL_P(zendlval); end = s+Z_STRLEN_P(zendlval); + while (1) { + if (UNEXPECTED(*s=='\\')) { + break; + } + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; + } + s++; + if (s == end) { + goto skip_escape_conversion; + } + } + + t = s; while (s> 6) + 0xC0; *t++ = (codepoint & 0x3F) + 0x80; } else if (codepoint <= 0xFFFF) { - byte_len = 3; *t++ = (codepoint >> 12) + 0xE0; *t++ = ((codepoint >> 6) & 0x3F) + 0x80; *t++ = (codepoint & 0x3F) + 0x80; } else if (codepoint <= 0x10FFFF) { - byte_len = 4; *t++ = (codepoint >> 18) + 0xF0; *t++ = ((codepoint >> 12) & 0x3F) + 0x80; *t++ = ((codepoint >> 6) & 0x3F) + 0x80; *t++ = (codepoint & 0x3F) + 0x80; } - - Z_STRLEN_P(zendlval) -= 2; /* \u */ - Z_STRLEN_P(zendlval) -= (len - byte_len); } break; default: @@ -1051,13 +1061,10 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot char octal_buf[4] = { 0, 0, 0, 0 }; octal_buf[0] = *s; - Z_STRLEN_P(zendlval)--; if (ZEND_IS_OCT(*(s+1))) { octal_buf[1] = *(++s); - Z_STRLEN_P(zendlval)--; if (ZEND_IS_OCT(*(s+1))) { octal_buf[2] = *(++s); - Z_STRLEN_P(zendlval)--; } } if (octal_buf[2] && @@ -1083,6 +1090,9 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot s++; } *t = 0; + Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval); + +skip_escape_conversion: if (SCNG(output_filter)) { size_t sz = 0; unsigned char *str; @@ -1131,7 +1141,7 @@ restart: SCNG(yy_text) = YYCURSOR; -#line 1135 "Zend/zend_language_scanner.c" +#line 1145 "Zend/zend_language_scanner.c" { YYCTYPE yych; unsigned int yyaccept = 0; @@ -1183,7 +1193,7 @@ yyc_INITIAL: yy4: YYDEBUG(4, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1840 "Zend/zend_language_scanner.l" +#line 1854 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { RETURN_TOKEN(END); @@ -1222,13 +1232,15 @@ inline_char_handler: if (readsize < yyleng) { yyless(readsize); } + } else if (yyleng == 1) { + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext)); } else { - ZVAL_STRINGL(zendlval, yytext, yyleng); + ZVAL_STRINGL(zendlval, yytext, yyleng); } HANDLE_NEWLINES(yytext, yyleng); RETURN_TOKEN_WITH_VAL(T_INLINE_HTML); } -#line 1232 "Zend/zend_language_scanner.c" +#line 1244 "Zend/zend_language_scanner.c" yy5: YYDEBUG(5, *YYCURSOR); yych = *++YYCURSOR; @@ -1244,7 +1256,7 @@ yy5: yy7: YYDEBUG(7, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1828 "Zend/zend_language_scanner.l" +#line 1842 "Zend/zend_language_scanner.l" { if (CG(short_tags)) { BEGIN(ST_IN_SCRIPTING); @@ -1256,13 +1268,13 @@ yy7: goto inline_char_handler; } } -#line 1260 "Zend/zend_language_scanner.c" +#line 1272 "Zend/zend_language_scanner.c" yy8: YYDEBUG(8, *YYCURSOR); ++YYCURSOR; YYDEBUG(9, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1809 "Zend/zend_language_scanner.l" +#line 1823 "Zend/zend_language_scanner.l" { BEGIN(ST_IN_SCRIPTING); if (PARSER_MODE()) { @@ -1270,7 +1282,7 @@ yy8: } RETURN_TOKEN(T_OPEN_TAG_WITH_ECHO); } -#line 1274 "Zend/zend_language_scanner.c" +#line 1286 "Zend/zend_language_scanner.c" yy10: YYDEBUG(10, *YYCURSOR); yych = *++YYCURSOR; @@ -1301,7 +1313,7 @@ yy14: yy15: YYDEBUG(15, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1818 "Zend/zend_language_scanner.l" +#line 1832 "Zend/zend_language_scanner.l" { HANDLE_NEWLINE(yytext[yyleng-1]); BEGIN(ST_IN_SCRIPTING); @@ -1310,7 +1322,7 @@ yy15: } RETURN_TOKEN(T_OPEN_TAG); } -#line 1314 "Zend/zend_language_scanner.c" +#line 1326 "Zend/zend_language_scanner.c" yy16: YYDEBUG(16, *YYCURSOR); ++YYCURSOR; @@ -1367,7 +1379,7 @@ yyc_ST_BACKQUOTE: yy20: YYDEBUG(20, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2268 "Zend/zend_language_scanner.l" +#line 2305 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { RETURN_TOKEN(END); @@ -1412,7 +1424,7 @@ yy20: RETURN_TOKEN(T_ERROR); } } -#line 1416 "Zend/zend_language_scanner.c" +#line 1428 "Zend/zend_language_scanner.c" yy21: YYDEBUG(21, *YYCURSOR); yych = *++YYCURSOR; @@ -1436,12 +1448,12 @@ yy22: ++YYCURSOR; YYDEBUG(23, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2208 "Zend/zend_language_scanner.l" +#line 2245 "Zend/zend_language_scanner.l" { BEGIN(ST_IN_SCRIPTING); RETURN_TOKEN('`'); } -#line 1445 "Zend/zend_language_scanner.c" +#line 1457 "Zend/zend_language_scanner.c" yy24: YYDEBUG(24, *YYCURSOR); yych = *++YYCURSOR; @@ -1462,34 +1474,34 @@ yy25: yy27: YYDEBUG(27, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1903 "Zend/zend_language_scanner.l" +#line 1919 "Zend/zend_language_scanner.l" { RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1470 "Zend/zend_language_scanner.c" +#line 1482 "Zend/zend_language_scanner.c" yy28: YYDEBUG(28, *YYCURSOR); ++YYCURSOR; YYDEBUG(29, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1609 "Zend/zend_language_scanner.l" +#line 1619 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_VARNAME); RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES); } -#line 1481 "Zend/zend_language_scanner.c" +#line 1493 "Zend/zend_language_scanner.c" yy30: YYDEBUG(30, *YYCURSOR); ++YYCURSOR; YYDEBUG(31, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2196 "Zend/zend_language_scanner.l" +#line 2233 "Zend/zend_language_scanner.l" { yy_push_state(ST_IN_SCRIPTING); yyless(1); RETURN_TOKEN(T_CURLY_OPEN); } -#line 1493 "Zend/zend_language_scanner.c" +#line 1505 "Zend/zend_language_scanner.c" yy32: YYDEBUG(32, *YYCURSOR); yych = *++YYCURSOR; @@ -1503,13 +1515,13 @@ yy34: ++YYCURSOR; YYDEBUG(35, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1897 "Zend/zend_language_scanner.l" +#line 1913 "Zend/zend_language_scanner.l" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET); RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1513 "Zend/zend_language_scanner.c" +#line 1525 "Zend/zend_language_scanner.c" yy36: YYDEBUG(36, *YYCURSOR); yych = *++YYCURSOR; @@ -1527,13 +1539,13 @@ yy37: ++YYCURSOR; YYDEBUG(38, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1889 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY); RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1537 "Zend/zend_language_scanner.c" +#line 1549 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_DOUBLE_QUOTES: @@ -1586,7 +1598,7 @@ yyc_ST_DOUBLE_QUOTES: yy42: YYDEBUG(42, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2214 "Zend/zend_language_scanner.l" +#line 2251 "Zend/zend_language_scanner.l" { if (GET_DOUBLE_QUOTES_SCANNED_LENGTH()) { YYCURSOR += GET_DOUBLE_QUOTES_SCANNED_LENGTH() - 1; @@ -1639,18 +1651,18 @@ double_quotes_scan_done: RETURN_TOKEN(T_ERROR); } } -#line 1643 "Zend/zend_language_scanner.c" +#line 1655 "Zend/zend_language_scanner.c" yy43: YYDEBUG(43, *YYCURSOR); ++YYCURSOR; YYDEBUG(44, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2203 "Zend/zend_language_scanner.l" +#line 2240 "Zend/zend_language_scanner.l" { BEGIN(ST_IN_SCRIPTING); RETURN_TOKEN('"'); } -#line 1654 "Zend/zend_language_scanner.c" +#line 1666 "Zend/zend_language_scanner.c" yy45: YYDEBUG(45, *YYCURSOR); yych = *++YYCURSOR; @@ -1689,34 +1701,34 @@ yy47: yy49: YYDEBUG(49, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1903 "Zend/zend_language_scanner.l" +#line 1919 "Zend/zend_language_scanner.l" { RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1697 "Zend/zend_language_scanner.c" +#line 1709 "Zend/zend_language_scanner.c" yy50: YYDEBUG(50, *YYCURSOR); ++YYCURSOR; YYDEBUG(51, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1609 "Zend/zend_language_scanner.l" +#line 1619 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_VARNAME); RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES); } -#line 1708 "Zend/zend_language_scanner.c" +#line 1720 "Zend/zend_language_scanner.c" yy52: YYDEBUG(52, *YYCURSOR); ++YYCURSOR; YYDEBUG(53, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2196 "Zend/zend_language_scanner.l" +#line 2233 "Zend/zend_language_scanner.l" { yy_push_state(ST_IN_SCRIPTING); yyless(1); RETURN_TOKEN(T_CURLY_OPEN); } -#line 1720 "Zend/zend_language_scanner.c" +#line 1732 "Zend/zend_language_scanner.c" yy54: YYDEBUG(54, *YYCURSOR); yych = *++YYCURSOR; @@ -1730,13 +1742,13 @@ yy56: ++YYCURSOR; YYDEBUG(57, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1897 "Zend/zend_language_scanner.l" +#line 1913 "Zend/zend_language_scanner.l" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET); RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1740 "Zend/zend_language_scanner.c" +#line 1752 "Zend/zend_language_scanner.c" yy58: YYDEBUG(58, *YYCURSOR); yych = *++YYCURSOR; @@ -1754,13 +1766,13 @@ yy59: ++YYCURSOR; YYDEBUG(60, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1889 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY); RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1764 "Zend/zend_language_scanner.c" +#line 1776 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_END_HEREDOC: @@ -1771,7 +1783,7 @@ yyc_ST_END_HEREDOC: ++YYCURSOR; YYDEBUG(64, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2182 "Zend/zend_language_scanner.l" +#line 2219 "Zend/zend_language_scanner.l" { zend_heredoc_label *heredoc_label = zend_ptr_stack_pop(&SCNG(heredoc_label_stack)); @@ -1784,7 +1796,7 @@ yyc_ST_END_HEREDOC: BEGIN(ST_IN_SCRIPTING); RETURN_TOKEN(T_END_HEREDOC); } -#line 1788 "Zend/zend_language_scanner.c" +#line 1800 "Zend/zend_language_scanner.c" /* *********************************** */ yyc_ST_HEREDOC: { @@ -1832,7 +1844,7 @@ yyc_ST_HEREDOC: yy68: YYDEBUG(68, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2314 "Zend/zend_language_scanner.l" +#line 2351 "Zend/zend_language_scanner.l" { int newline = 0; @@ -1909,7 +1921,7 @@ heredoc_scan_done: RETURN_TOKEN(T_ERROR); } } -#line 1913 "Zend/zend_language_scanner.c" +#line 1925 "Zend/zend_language_scanner.c" yy69: YYDEBUG(69, *YYCURSOR); yych = *++YYCURSOR; @@ -1948,34 +1960,34 @@ yy71: yy73: YYDEBUG(73, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1903 "Zend/zend_language_scanner.l" +#line 1919 "Zend/zend_language_scanner.l" { RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1956 "Zend/zend_language_scanner.c" +#line 1968 "Zend/zend_language_scanner.c" yy74: YYDEBUG(74, *YYCURSOR); ++YYCURSOR; YYDEBUG(75, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1609 "Zend/zend_language_scanner.l" +#line 1619 "Zend/zend_language_scanner.l" { yy_push_state(ST_LOOKING_FOR_VARNAME); RETURN_TOKEN(T_DOLLAR_OPEN_CURLY_BRACES); } -#line 1967 "Zend/zend_language_scanner.c" +#line 1979 "Zend/zend_language_scanner.c" yy76: YYDEBUG(76, *YYCURSOR); ++YYCURSOR; YYDEBUG(77, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2196 "Zend/zend_language_scanner.l" +#line 2233 "Zend/zend_language_scanner.l" { yy_push_state(ST_IN_SCRIPTING); yyless(1); RETURN_TOKEN(T_CURLY_OPEN); } -#line 1979 "Zend/zend_language_scanner.c" +#line 1991 "Zend/zend_language_scanner.c" yy78: YYDEBUG(78, *YYCURSOR); yych = *++YYCURSOR; @@ -1989,13 +2001,13 @@ yy80: ++YYCURSOR; YYDEBUG(81, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1897 "Zend/zend_language_scanner.l" +#line 1913 "Zend/zend_language_scanner.l" { yyless(yyleng - 1); yy_push_state(ST_VAR_OFFSET); RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 1999 "Zend/zend_language_scanner.c" +#line 2011 "Zend/zend_language_scanner.c" yy82: YYDEBUG(82, *YYCURSOR); yych = *++YYCURSOR; @@ -2013,13 +2025,13 @@ yy83: ++YYCURSOR; YYDEBUG(84, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1889 "Zend/zend_language_scanner.l" +#line 1905 "Zend/zend_language_scanner.l" { yyless(yyleng - 3); yy_push_state(ST_LOOKING_FOR_PROPERTY); RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 2023 "Zend/zend_language_scanner.c" +#line 2035 "Zend/zend_language_scanner.c" } /* *********************************** */ yyc_ST_IN_SCRIPTING: @@ -2188,7 +2200,7 @@ yy87: ++YYCURSOR; YYDEBUG(88, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2449 "Zend/zend_language_scanner.l" +#line 2486 "Zend/zend_language_scanner.l" { if (YYCURSOR > YYLIMIT) { RETURN_TOKEN(END); @@ -2197,7 +2209,7 @@ yy87: zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); goto restart; } -#line 2201 "Zend/zend_language_scanner.c" +#line 2213 "Zend/zend_language_scanner.c" yy89: YYDEBUG(89, *YYCURSOR); ++YYCURSOR; @@ -2209,11 +2221,11 @@ yy89: } YYDEBUG(91, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1315 "Zend/zend_language_scanner.l" +#line 1325 "Zend/zend_language_scanner.l" { goto return_whitespace; } -#line 2217 "Zend/zend_language_scanner.c" +#line 2229 "Zend/zend_language_scanner.c" yy92: YYDEBUG(92, *YYCURSOR); ++YYCURSOR; @@ -2221,17 +2233,17 @@ yy92: yy93: YYDEBUG(93, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1598 "Zend/zend_language_scanner.l" +#line 1608 "Zend/zend_language_scanner.l" { RETURN_TOKEN(yytext[0]); } -#line 2229 "Zend/zend_language_scanner.c" +#line 2241 "Zend/zend_language_scanner.c" yy94: YYDEBUG(94, *YYCURSOR); ++YYCURSOR; YYDEBUG(95, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2082 "Zend/zend_language_scanner.l" +#line 2119 "Zend/zend_language_scanner.l" { int bprefix = (yytext[0] != '"') ? 1 : 0; @@ -2276,13 +2288,13 @@ yy94: BEGIN(ST_DOUBLE_QUOTES); RETURN_TOKEN('"'); } -#line 2280 "Zend/zend_language_scanner.c" +#line 2292 "Zend/zend_language_scanner.c" yy96: YYDEBUG(96, *YYCURSOR); ++YYCURSOR; YYDEBUG(97, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1930 "Zend/zend_language_scanner.l" +#line 1946 "Zend/zend_language_scanner.l" { while (YYCURSOR < YYLIMIT) { switch (*YYCURSOR++) { @@ -2314,7 +2326,7 @@ yy96: } RETURN_TOKEN(T_COMMENT); } -#line 2318 "Zend/zend_language_scanner.c" +#line 2330 "Zend/zend_language_scanner.c" yy98: YYDEBUG(98, *YYCURSOR); yych = *++YYCURSOR; @@ -2345,7 +2357,7 @@ yy101: ++YYCURSOR; YYDEBUG(102, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 2013 "Zend/zend_language_scanner.l" +#line 2029 "Zend/zend_language_scanner.l" { register char *s, *t; char *end; @@ -2372,37 +2384,58 @@ yy101: } } + if (yyleng-bprefix-2 <= 1) { + if (yyleng-bprefix-2 < 1) { + ZVAL_EMPTY_STRING(zendlval); + } else { + zend_uchar c = (zend_uchar)*(yytext+bprefix+1); + if (c == '\n' || c == '\r') { + CG(zend_lineno)++; + } + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c)); + } + goto skip_escape_conversion; + } ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2); /* convert escape sequences */ - s = t = Z_STRVAL_P(zendlval); + s = Z_STRVAL_P(zendlval); end = s+Z_STRLEN_P(zendlval); + while (1) { + if (UNEXPECTED(*s=='\\')) { + break; + } + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; + } + s++; + if (s == end) { + goto skip_escape_conversion; + } + } + + t = s; while (s YYLIMIT) { RETURN_TOKEN(END); @@ -7633,13 +7666,13 @@ yy792: zend_error(E_COMPILE_WARNING,"Unexpected character in input: '%c' (ASCII=%d) state=%d", yytext[0], yytext[0], YYSTATE); goto restart; } -#line 7637 "Zend/zend_language_scanner.c" +#line 7670 "Zend/zend_language_scanner.c" yy794: YYDEBUG(794, *YYCURSOR); ++YYCURSOR; YYDEBUG(795, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1917 "Zend/zend_language_scanner.l" +#line 1933 "Zend/zend_language_scanner.l" { /* Invalid rule to return a more explicit parse error with proper line number */ yyless(0); @@ -7647,19 +7680,19 @@ yy794: ZVAL_NULL(zendlval); RETURN_TOKEN_WITH_VAL(T_ENCAPSED_AND_WHITESPACE); } -#line 7651 "Zend/zend_language_scanner.c" +#line 7684 "Zend/zend_language_scanner.c" yy796: YYDEBUG(796, *YYCURSOR); ++YYCURSOR; yy797: YYDEBUG(797, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1912 "Zend/zend_language_scanner.l" +#line 1928 "Zend/zend_language_scanner.l" { /* Only '[' or '-' can be valid, but returning other tokens will allow a more explicit parse error */ RETURN_TOKEN(yytext[0]); } -#line 7663 "Zend/zend_language_scanner.c" +#line 7696 "Zend/zend_language_scanner.c" yy798: YYDEBUG(798, *YYCURSOR); yych = *++YYCURSOR; @@ -7694,7 +7727,7 @@ yy799: yy800: YYDEBUG(800, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1746 "Zend/zend_language_scanner.l" +#line 1756 "Zend/zend_language_scanner.l" { /* Offset could be treated as a long */ if (yyleng < MAX_LENGTH_OF_LONG - 1 || (yyleng == MAX_LENGTH_OF_LONG - 1 && strcmp(yytext, long_min_digits) < 0)) { char *end; @@ -7710,7 +7743,7 @@ string: } RETURN_TOKEN_WITH_VAL(T_NUM_STRING); } -#line 7714 "Zend/zend_language_scanner.c" +#line 7747 "Zend/zend_language_scanner.c" yy801: YYDEBUG(801, *YYCURSOR); ++YYCURSOR; @@ -7732,22 +7765,22 @@ yy803: } YYDEBUG(805, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1925 "Zend/zend_language_scanner.l" +#line 1941 "Zend/zend_language_scanner.l" { RETURN_TOKEN_WITH_STR(T_STRING, 0); } -#line 7740 "Zend/zend_language_scanner.c" +#line 7773 "Zend/zend_language_scanner.c" yy806: YYDEBUG(806, *YYCURSOR); ++YYCURSOR; YYDEBUG(807, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1907 "Zend/zend_language_scanner.l" +#line 1923 "Zend/zend_language_scanner.l" { yy_pop_state(); RETURN_TOKEN(']'); } -#line 7751 "Zend/zend_language_scanner.c" +#line 7784 "Zend/zend_language_scanner.c" yy808: YYDEBUG(808, *YYCURSOR); ++YYCURSOR; @@ -7772,11 +7805,11 @@ yy808: yy810: YYDEBUG(810, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1903 "Zend/zend_language_scanner.l" +#line 1919 "Zend/zend_language_scanner.l" { RETURN_TOKEN_WITH_STR(T_VARIABLE, 1); } -#line 7780 "Zend/zend_language_scanner.c" +#line 7813 "Zend/zend_language_scanner.c" yy811: YYDEBUG(811, *YYCURSOR); ++YYCURSOR; @@ -7788,12 +7821,16 @@ yy811: yy813: YYDEBUG(813, *YYCURSOR); yyleng = YYCURSOR - SCNG(yy_text); -#line 1762 "Zend/zend_language_scanner.l" +#line 1772 "Zend/zend_language_scanner.l" { /* Offset must be treated as a string */ - ZVAL_STRINGL(zendlval, yytext, yyleng); + if (yyleng == 1) { + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); + } else { + ZVAL_STRINGL(zendlval, yytext, yyleng); + } RETURN_TOKEN_WITH_VAL(T_NUM_STRING); } -#line 7797 "Zend/zend_language_scanner.c" +#line 7834 "Zend/zend_language_scanner.c" yy814: YYDEBUG(814, *YYCURSOR); yych = *++YYCURSOR; @@ -7833,7 +7870,7 @@ yy819: goto yy813; } } -#line 2458 "Zend/zend_language_scanner.l" +#line 2495 "Zend/zend_language_scanner.l" emit_token_with_str: diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index e5214e66a8..bb87d818f9 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -874,6 +874,8 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter SCNG(output_filter)((unsigned char **)&s, &sz, (unsigned char *)yytext, (size_t)yyleng); \ ZVAL_STRINGL(zendlval, s, sz); \ efree(s); \ + } else if (yyleng == 1) { \ + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); \ } else { \ ZVAL_STRINGL(zendlval, yytext, yyleng); \ } @@ -883,11 +885,38 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot register char *s, *t; char *end; + if (len <= 1) { + if (len < 1) { + ZVAL_EMPTY_STRING(zendlval); + } else { + zend_uchar c = (zend_uchar)*str; + if (c == '\n' || c == '\r') { + CG(zend_lineno)++; + } + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c)); + } + goto skip_escape_conversion; + } + ZVAL_STRINGL(zendlval, str, len); /* convert escape sequences */ - s = t = Z_STRVAL_P(zendlval); + s = Z_STRVAL_P(zendlval); end = s+Z_STRLEN_P(zendlval); + while (1) { + if (UNEXPECTED(*s=='\\')) { + break; + } + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; + } + s++; + if (s == end) { + goto skip_escape_conversion; + } + } + + t = s; while (s> 6) + 0xC0; *t++ = (codepoint & 0x3F) + 0x80; } else if (codepoint <= 0xFFFF) { - byte_len = 3; *t++ = (codepoint >> 12) + 0xE0; *t++ = ((codepoint >> 6) & 0x3F) + 0x80; *t++ = (codepoint & 0x3F) + 0x80; } else if (codepoint <= 0x10FFFF) { - byte_len = 4; *t++ = (codepoint >> 18) + 0xF0; *t++ = ((codepoint >> 12) & 0x3F) + 0x80; *t++ = ((codepoint >> 6) & 0x3F) + 0x80; *t++ = (codepoint & 0x3F) + 0x80; } - - Z_STRLEN_P(zendlval) -= 2; /* \u */ - Z_STRLEN_P(zendlval) -= (len - byte_len); } break; default: @@ -1049,13 +1059,10 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot char octal_buf[4] = { 0, 0, 0, 0 }; octal_buf[0] = *s; - Z_STRLEN_P(zendlval)--; if (ZEND_IS_OCT(*(s+1))) { octal_buf[1] = *(++s); - Z_STRLEN_P(zendlval)--; if (ZEND_IS_OCT(*(s+1))) { octal_buf[2] = *(++s); - Z_STRLEN_P(zendlval)--; } } if (octal_buf[2] && @@ -1081,6 +1088,9 @@ static int zend_scan_escape_string(zval *zendlval, char *str, int len, char quot s++; } *t = 0; + Z_STRLEN_P(zendlval) = t - Z_STRVAL_P(zendlval); + +skip_escape_conversion: if (SCNG(output_filter)) { size_t sz = 0; unsigned char *str; @@ -1760,7 +1770,11 @@ string: } {LNUM}|{HNUM}|{BNUM} { /* Offset must be treated as a string */ - ZVAL_STRINGL(zendlval, yytext, yyleng); + if (yyleng == 1) { + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*(yytext))); + } else { + ZVAL_STRINGL(zendlval, yytext, yyleng); + } RETURN_TOKEN_WITH_VAL(T_NUM_STRING); } @@ -1875,8 +1889,10 @@ inline_char_handler: if (readsize < yyleng) { yyless(readsize); } + } else if (yyleng == 1) { + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR((zend_uchar)*yytext)); } else { - ZVAL_STRINGL(zendlval, yytext, yyleng); + ZVAL_STRINGL(zendlval, yytext, yyleng); } HANDLE_NEWLINES(yytext, yyleng); RETURN_TOKEN_WITH_VAL(T_INLINE_HTML); @@ -2036,37 +2052,58 @@ inline_char_handler: } } + if (yyleng-bprefix-2 <= 1) { + if (yyleng-bprefix-2 < 1) { + ZVAL_EMPTY_STRING(zendlval); + } else { + zend_uchar c = (zend_uchar)*(yytext+bprefix+1); + if (c == '\n' || c == '\r') { + CG(zend_lineno)++; + } + ZVAL_INTERNED_STR(zendlval, ZSTR_CHAR(c)); + } + goto skip_escape_conversion; + } ZVAL_STRINGL(zendlval, yytext+bprefix+1, yyleng-bprefix-2); /* convert escape sequences */ - s = t = Z_STRVAL_P(zendlval); + s = Z_STRVAL_P(zendlval); end = s+Z_STRLEN_P(zendlval); + while (1) { + if (UNEXPECTED(*s=='\\')) { + break; + } + if (*s == '\n' || (*s == '\r' && (*(s+1) != '\n'))) { + CG(zend_lineno)++; + } + s++; + if (s == end) { + goto skip_escape_conversion; + } + } + + t = s; while (s