From 47cf18ba4ef9fb9df6c3affff8f2a3f5a3f930a8 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Sat, 15 Feb 2020 17:54:02 +0100 Subject: [PATCH] Don't include trailing newline in comment token Don't include a trailing newline in T_COMMENT tokens, instead leave it for a following T_WHITESPACE token. The newline does not belong to the comment logically, and this makes for an ugly special case, as other tokens do not include trailing newlines. Whitespace-sensitive tooling will want to either forward or backward emulate this change. Closes GH-5182. --- UPGRADING | 6 + Zend/zend_language_scanner.l | 6 +- ext/tokenizer/tests/PhpToken_methods.phpt | 66 ++++----- .../tests/token_get_all_variation9.phpt | 132 ++++++++++-------- 4 files changed, 116 insertions(+), 94 deletions(-) diff --git a/UPGRADING b/UPGRADING index cf96463b49..ed8690bf82 100644 --- a/UPGRADING +++ b/UPGRADING @@ -470,6 +470,12 @@ PHP 8.0 UPGRADE NOTES . The $use_include_path parameter, which was not used internally, has been removed from tidy_repair_string(). +- Tokenizer: + . T_COMMENT tokens will no longer include a trailing newline. The newline will + instead be part of a following T_WHITESPACE token. It should be noted that + T_COMMENT is not always followed by whitespace, it may also be followed by + T_CLOSE_TAG or end-of-file. + - XML: . xml_parser_create(_ns) will now return an XmlParser object rather than a resource. Return value checks using is_resource() should be replaced with diff --git a/Zend/zend_language_scanner.l b/Zend/zend_language_scanner.l index ffb5158938..4580b858e0 100644 --- a/Zend/zend_language_scanner.l +++ b/Zend/zend_language_scanner.l @@ -2293,12 +2293,8 @@ inline_char_handler: while (YYCURSOR < YYLIMIT) { switch (*YYCURSOR++) { case '\r': - if (*YYCURSOR == '\n') { - YYCURSOR++; - } - /* fall through */ case '\n': - CG(zend_lineno)++; + YYCURSOR--; break; case '?': if (*YYCURSOR == '>') { diff --git a/ext/tokenizer/tests/PhpToken_methods.phpt b/ext/tokenizer/tests/PhpToken_methods.phpt index 3e08f3c39e..570e880473 100644 --- a/ext/tokenizer/tests/PhpToken_methods.phpt +++ b/ext/tokenizer/tests/PhpToken_methods.phpt @@ -22,50 +22,51 @@ foreach ($tokens as $i => $token) { } // is() variations +$token = $tokens[5]; echo "\nSuccess:\n"; -var_dump($tokens[4]->is(T_FUNCTION)); -var_dump($tokens[4]->is('function')); -var_dump($tokens[4]->is(['class', T_FUNCTION])); -var_dump($tokens[4]->is([T_CLASS, 'function'])); +var_dump($token->is(T_FUNCTION)); +var_dump($token->is('function')); +var_dump($token->is(['class', T_FUNCTION])); +var_dump($token->is([T_CLASS, 'function'])); echo "\nFailure:\n"; -var_dump($tokens[4]->is(T_CLASS)); -var_dump($tokens[4]->is('class')); -var_dump($tokens[4]->is(['class', T_TRAIT])); -var_dump($tokens[4]->is([T_CLASS, 'trait'])); +var_dump($token->is(T_CLASS)); +var_dump($token->is('class')); +var_dump($token->is(['class', T_TRAIT])); +var_dump($token->is([T_CLASS, 'trait'])); echo "\nError:\n"; try { - $tokens[4]->is(3.141); + $token->is(3.141); } catch (TypeError $e) { echo $e->getMessage(), "\n"; } try { - $tokens[4]->is([3.141]); + $token->is([3.141]); } catch (TypeError $e) { echo $e->getMessage(), "\n"; } -unset($tokens[4]->id); -unset($tokens[4]->text); +unset($token->id); +unset($token->text); try { - $tokens[4]->is(T_FUNCTION); + $token->is(T_FUNCTION); } catch (Error $e) { echo $e->getMessage(), "\n"; } try { - $tokens[4]->is('function'); + $token->is('function'); } catch (Error $e) { echo $e->getMessage(), "\n"; } try { - $tokens[4]->is([T_FUNCTION]); + $token->is([T_FUNCTION]); } catch (Error $e) { echo $e->getMessage(), "\n"; } try { - $tokens[4]->is(['function']); + $token->is(['function']); } catch (Error $e) { echo $e->getMessage(), "\n"; } @@ -78,22 +79,23 @@ var_dump($token->getTokenName()); --EXPECT-- [ 0] T_OPEN_TAG ignorable [ 1] T_COMMENT ignorable -[ 2] T_DOC_COMMENT ignorable -[ 3] T_WHITESPACE ignorable -[ 4] T_FUNCTION meaningful -[ 5] T_WHITESPACE ignorable -[ 6] T_STRING meaningful -[ 7] ( meaningful -[ 8] ) meaningful -[ 9] T_WHITESPACE ignorable -[10] { meaningful -[11] T_WHITESPACE ignorable -[12] T_ECHO meaningful -[13] T_WHITESPACE ignorable -[14] T_CONSTANT_ENCAPSED_STRING meaningful -[15] ; meaningful -[16] T_WHITESPACE ignorable -[17] } meaningful +[ 2] T_WHITESPACE ignorable +[ 3] T_DOC_COMMENT ignorable +[ 4] T_WHITESPACE ignorable +[ 5] T_FUNCTION meaningful +[ 6] T_WHITESPACE ignorable +[ 7] T_STRING meaningful +[ 8] ( meaningful +[ 9] ) meaningful +[10] T_WHITESPACE ignorable +[11] { meaningful +[12] T_WHITESPACE ignorable +[13] T_ECHO meaningful +[14] T_WHITESPACE ignorable +[15] T_CONSTANT_ENCAPSED_STRING meaningful +[16] ; meaningful +[17] T_WHITESPACE ignorable +[18] } meaningful Success: bool(true) diff --git a/ext/tokenizer/tests/token_get_all_variation9.phpt b/ext/tokenizer/tests/token_get_all_variation9.phpt index 082525a176..1e906c416b 100644 --- a/ext/tokenizer/tests/token_get_all_variation9.phpt +++ b/ext/tokenizer/tests/token_get_all_variation9.phpt @@ -40,7 +40,7 @@ echo "Done" ?> --EXPECTF-- *** Testing token_get_all() : 'source' string with different comments *** -array(50) { +array(52) { [0]=> array(3) { [0]=> @@ -78,12 +78,21 @@ array(50) { [0]=> int(%d) [1]=> - string(13) "// int value -" + string(12) "// int value" [2]=> int(6) } [4]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(1) " +" + [2]=> + int(6) + } + [5]=> array(3) { [0]=> int(%d) @@ -92,7 +101,7 @@ array(50) { [2]=> int(7) } - [5]=> + [6]=> array(3) { [0]=> int(%d) @@ -101,9 +110,9 @@ array(50) { [2]=> int(7) } - [6]=> - string(1) "=" [7]=> + string(1) "=" + [8]=> array(3) { [0]=> int(%d) @@ -112,7 +121,7 @@ array(50) { [2]=> int(7) } - [8]=> + [9]=> array(3) { [0]=> int(%d) @@ -121,9 +130,9 @@ array(50) { [2]=> int(7) } - [9]=> - string(1) ";" [10]=> + string(1) ";" + [11]=> array(3) { [0]=> int(%d) @@ -133,7 +142,7 @@ array(50) { [2]=> int(7) } - [11]=> + [12]=> array(3) { [0]=> int(%d) @@ -142,7 +151,7 @@ array(50) { [2]=> int(8) } - [12]=> + [13]=> array(3) { [0]=> int(%d) @@ -151,9 +160,9 @@ array(50) { [2]=> int(8) } - [13]=> - string(1) "=" [14]=> + string(1) "=" + [15]=> array(3) { [0]=> int(%d) @@ -162,7 +171,7 @@ array(50) { [2]=> int(8) } - [15]=> + [16]=> array(3) { [0]=> int(%d) @@ -171,9 +180,9 @@ array(50) { [2]=> int(8) } - [16]=> - string(1) ";" [17]=> + string(1) ";" + [18]=> array(3) { [0]=> int(%d) @@ -183,7 +192,7 @@ array(50) { [2]=> int(8) } - [18]=> + [19]=> array(3) { [0]=> int(%d) @@ -192,7 +201,7 @@ array(50) { [2]=> int(9) } - [19]=> + [20]=> array(3) { [0]=> int(%d) @@ -201,9 +210,9 @@ array(50) { [2]=> int(9) } - [20]=> - string(1) "=" [21]=> + string(1) "=" + [22]=> array(3) { [0]=> int(%d) @@ -212,7 +221,7 @@ array(50) { [2]=> int(9) } - [22]=> + [23]=> array(3) { [0]=> int(%d) @@ -221,9 +230,9 @@ array(50) { [2]=> int(9) } - [23]=> - string(1) ";" [24]=> + string(1) ";" + [25]=> array(3) { [0]=> int(%d) @@ -232,27 +241,27 @@ array(50) { [2]=> int(9) } - [25]=> + [26]=> array(3) { [0]=> int(%d) [1]=> - string(14) "// bool value -" + string(13) "// bool value" [2]=> int(9) } - [26]=> + [27]=> array(3) { [0]=> int(%d) [1]=> - string(1) " + string(2) " + " [2]=> - int(10) + int(9) } - [27]=> + [28]=> array(3) { [0]=> int(%d) @@ -264,7 +273,7 @@ array(50) { [2]=> int(11) } - [28]=> + [29]=> array(3) { [0]=> int(%d) @@ -274,7 +283,7 @@ array(50) { [2]=> int(14) } - [29]=> + [30]=> array(3) { [0]=> int(%d) @@ -283,7 +292,7 @@ array(50) { [2]=> int(15) } - [30]=> + [31]=> array(3) { [0]=> int(%d) @@ -292,9 +301,9 @@ array(50) { [2]=> int(15) } - [31]=> - string(1) "=" [32]=> + string(1) "=" + [33]=> array(3) { [0]=> int(%d) @@ -303,7 +312,7 @@ array(50) { [2]=> int(15) } - [33]=> + [34]=> array(3) { [0]=> int(%d) @@ -312,7 +321,7 @@ array(50) { [2]=> int(15) } - [34]=> + [35]=> array(3) { [0]=> int(%d) @@ -321,9 +330,9 @@ array(50) { [2]=> int(15) } - [35]=> - string(1) "+" [36]=> + string(1) "+" + [37]=> array(3) { [0]=> int(%d) @@ -332,7 +341,7 @@ array(50) { [2]=> int(15) } - [37]=> + [38]=> array(3) { [0]=> int(%d) @@ -341,9 +350,9 @@ array(50) { [2]=> int(15) } - [38]=> - string(1) ";" [39]=> + string(1) ";" + [40]=> array(3) { [0]=> int(%d) @@ -353,7 +362,7 @@ array(50) { [2]=> int(15) } - [40]=> + [41]=> array(3) { [0]=> int(%d) @@ -362,9 +371,9 @@ array(50) { [2]=> int(16) } - [41]=> - string(1) "(" [42]=> + string(1) "(" + [43]=> array(3) { [0]=> int(%d) @@ -373,11 +382,11 @@ array(50) { [2]=> int(16) } - [43]=> - string(1) ")" [44]=> - string(1) ";" + string(1) ")" [45]=> + string(1) ";" + [46]=> array(3) { [0]=> int(%d) @@ -386,37 +395,46 @@ array(50) { [2]=> int(16) } - [46]=> + [47]=> array(3) { [0]=> int(%d) [1]=> - string(20) "# expected: int(%d) -" + string(19) "# expected: int(30)" [2]=> int(16) } - [47]=> + [48]=> array(3) { [0]=> int(%d) [1]=> - string(1) " + string(2) " + " [2]=> - int(17) + int(16) } - [48]=> + [49]=> + array(3) { + [0]=> + int(%d) + [1]=> + string(16) "# end of program" + [2]=> + int(18) + } + [50]=> array(3) { [0]=> int(%d) [1]=> - string(17) "# end of program + string(1) " " [2]=> int(18) } - [49]=> + [51]=> array(3) { [0]=> int(%d) -- 2.50.1