]> granicus.if.org Git - php/commitdiff
Implement numeric literal separators
authorTheodore Brown <theodorejb@outlook.com>
Thu, 2 May 2019 17:10:19 +0000 (12:10 -0500)
committerNikita Popov <nikita.ppv@gmail.com>
Fri, 14 Jun 2019 09:37:04 +0000 (11:37 +0200)
RFC: https://wiki.php.net/rfc/numeric_literal_separator

UPGRADING
Zend/tests/numeric_literal_separator_001.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_002.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_003.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_004.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_005.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_006.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_007.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_008.phpt [new file with mode: 0644]
Zend/tests/numeric_literal_separator_009.phpt [new file with mode: 0644]
Zend/zend_language_scanner.l

index 66be319cb9b2d305bc12b33ec9c7f0b3c1fd252c..1eaf8fc3d483306e57db79b6e484761e77e4d0d0 100644 (file)
--- a/UPGRADING
+++ b/UPGRADING
@@ -186,6 +186,15 @@ PHP 7.4 UPGRADE NOTES
 
     RFC: https://wiki.php.net/rfc/spread_operator_for_array
 
+  . Added support for underscore separators in numeric literals. Some examples:
+
+        6.674_083e-11; // float
+        299_792_458;   // decimal
+        0xCAFE_F00D;   // hexadecimal
+        0b0101_1111;   // binary
+
+    RFC: https://wiki.php.net/rfc/numeric_literal_separator
+
   . Support for WeakReferences has been added.
     RFC: https://wiki.php.net/rfc/weakrefs
 
diff --git a/Zend/tests/numeric_literal_separator_001.phpt b/Zend/tests/numeric_literal_separator_001.phpt
new file mode 100644 (file)
index 0000000..866bd36
--- /dev/null
@@ -0,0 +1,27 @@
+--TEST--
+Valid use of numeric literal separator
+--FILE--
+<?php
+var_dump(299_792_458 === 299792458);
+var_dump(135_00 === 13500);
+var_dump(96_485.332_12 === 96485.33212);
+var_dump(6.626_070_15e-34 === 6.62607015e-34);
+var_dump(6.674_083e-11 === 6.674083e-11);
+var_dump(0xCAFE_F00D === 0xCAFEF00D);
+var_dump(0x54_4A_42 === 0x544A42);
+var_dump(0b0101_1111 === 0b01011111);
+var_dump(0b01_0000_10 === 0b01000010);
+var_dump(0137_041 === 0137041);
+var_dump(0_124 === 0124);
+--EXPECT--
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
+bool(true)
diff --git a/Zend/tests/numeric_literal_separator_002.phpt b/Zend/tests/numeric_literal_separator_002.phpt
new file mode 100644 (file)
index 0000000..984438f
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: trailing underscore
+--FILE--
+<?php
+100_;
+--EXPECTF--
+Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_003.phpt b/Zend/tests/numeric_literal_separator_003.phpt
new file mode 100644 (file)
index 0000000..e0cd716
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: adjacent underscores
+--FILE--
+<?php
+10__0;
+--EXPECTF--
+Parse error: syntax error, unexpected '__0' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_004.phpt b/Zend/tests/numeric_literal_separator_004.phpt
new file mode 100644 (file)
index 0000000..6db8f8e
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore left of period
+--FILE--
+<?php
+100_.0;
+--EXPECTF--
+Parse error: syntax error, unexpected '_' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_005.phpt b/Zend/tests/numeric_literal_separator_005.phpt
new file mode 100644 (file)
index 0000000..4b454e2
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore right of period
+--FILE--
+<?php
+100._0;
+--EXPECTF--
+Parse error: syntax error, unexpected '_0' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_006.phpt b/Zend/tests/numeric_literal_separator_006.phpt
new file mode 100644 (file)
index 0000000..14bd290
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore next to 0x
+--FILE--
+<?php
+0x_0123;
+--EXPECTF--
+Parse error: syntax error, unexpected 'x_0123' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_007.phpt b/Zend/tests/numeric_literal_separator_007.phpt
new file mode 100644 (file)
index 0000000..e74c7ed
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore next to 0b
+--FILE--
+<?php
+0b_0101;
+--EXPECTF--
+Parse error: syntax error, unexpected 'b_0101' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_008.phpt b/Zend/tests/numeric_literal_separator_008.phpt
new file mode 100644 (file)
index 0000000..66f1d0f
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore left of e
+--FILE--
+<?php
+1_e2;
+--EXPECTF--
+Parse error: syntax error, unexpected '_e2' (T_STRING) in %s on line %d
diff --git a/Zend/tests/numeric_literal_separator_009.phpt b/Zend/tests/numeric_literal_separator_009.phpt
new file mode 100644 (file)
index 0000000..c690f66
--- /dev/null
@@ -0,0 +1,7 @@
+--TEST--
+Invalid use: underscore right of e
+--FILE--
+<?php
+1e_2;
+--EXPECTF--
+Parse error: syntax error, unexpected 'e_2' (T_STRING) in %s on line %d
index 497b02230bbcb8fc78d3892ac86698620671df64..663431708b6b23d45ebe5212a2e3d7909762b50f 100644 (file)
@@ -120,6 +120,21 @@ do {                                                                                                                                                       \
 
 BEGIN_EXTERN_C()
 
+static void strip_underscores(char *str, int *len)
+{
+       char *src = str, *dest = str;
+       while (*src != '\0') {
+               if (*src != '_') {
+                       *dest = *src;
+                       dest++;
+               } else {
+                       --(*len);
+               }
+               src++;
+       }
+       *dest = '\0';
+}
+
 static size_t encoding_filter_script_to_internal(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length)
 {
        const zend_encoding *internal_encoding = zend_multibyte_get_internal_encoding();
@@ -1245,11 +1260,11 @@ restart:
 
 /*!re2c
 re2c:yyfill:check = 0;
-LNUM   [0-9]+
-DNUM   ([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)
+LNUM   [0-9]+(_[0-9]+)*
+DNUM   ({LNUM}?"."{LNUM})|({LNUM}"."{LNUM}?)
 EXPONENT_DNUM  (({LNUM}|{DNUM})[eE][+-]?{LNUM})
-HNUM   "0x"[0-9a-fA-F]+
-BNUM   "0b"[01]+
+HNUM   "0x"[0-9a-fA-F]+(_[0-9a-fA-F]+)*
+BNUM   "0b"[01]+(_[01]+)*
 LABEL  [a-zA-Z_\x80-\xff][a-zA-Z0-9_\x80-\xff]*
 WHITESPACE [ \n\r\t]+
 TABS_AND_SPACES [ \t]*
@@ -1760,45 +1775,70 @@ NEWLINE ("\r"|"\n"|"\r\n")
 }
 
 <ST_IN_SCRIPTING>{BNUM} {
-       char *bin = yytext + 2; /* Skip "0b" */
-       int len = yyleng - 2;
-       char *end;
+       /* The +/- 2 skips "0b" */
+       int len = yyleng - 2, contains_underscores;
+       char *end, *bin = yytext + 2;
 
        /* Skip any leading 0s */
-       while (*bin == '0') {
+       while (*bin == '0' || *bin == '_') {
                ++bin;
                --len;
        }
 
+       contains_underscores = (memchr(bin, '_', len) != NULL);
+
+       if (contains_underscores) {
+               bin = estrndup(bin, len);
+               strip_underscores(bin, &len);
+       }
+
        if (len < SIZEOF_ZEND_LONG * 8) {
                if (len == 0) {
                        ZVAL_LONG(zendlval, 0);
                } else {
                        errno = 0;
                        ZVAL_LONG(zendlval, ZEND_STRTOL(bin, &end, 2));
-                       ZEND_ASSERT(!errno && end == yytext + yyleng);
+                       ZEND_ASSERT(!errno && end == bin + len);
+               }
+               if (contains_underscores) {
+                       efree(bin);
                }
                RETURN_TOKEN_WITH_VAL(T_LNUMBER);
        } else {
                ZVAL_DOUBLE(zendlval, zend_bin_strtod(bin, (const char **)&end));
                /* errno isn't checked since we allow HUGE_VAL/INF overflow */
-               ZEND_ASSERT(end == yytext + yyleng);
+               ZEND_ASSERT(end == bin + len);
+               if (contains_underscores) {
+                       efree(bin);
+               }
                RETURN_TOKEN_WITH_VAL(T_DNUMBER);
        }
 }
 
 <ST_IN_SCRIPTING>{LNUM} {
-       char *end;
-       if (yyleng < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
+       int len = yyleng, contains_underscores;
+       char *end, *lnum = yytext;
+
+       contains_underscores = (memchr(lnum, '_', len) != NULL);
+
+       if (contains_underscores) {
+               lnum = estrndup(lnum, len);
+               strip_underscores(lnum, &len);
+       }
+
+       if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
                errno = 0;
                /* base must be passed explicitly for correct parse error on Windows */
-               ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, yytext[0] == '0' ? 8 : 10));
+               ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
                /* This isn't an assert, we need to ensure 019 isn't valid octal
                 * Because the lexing itself doesn't do that for us
                 */
-               if (end != yytext + yyleng) {
+               if (end != lnum + len) {
                        zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
                        ZVAL_UNDEF(zendlval);
+                       if (contains_underscores) {
+                               efree(lnum);
+                       }
                        if (PARSER_MODE()) {
                                RETURN_TOKEN(T_ERROR);
                        }
@@ -1806,29 +1846,38 @@ NEWLINE ("\r"|"\n"|"\r\n")
                }
        } else {
                errno = 0;
-               ZVAL_LONG(zendlval, ZEND_STRTOL(yytext, &end, 0));
+               ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
                if (errno == ERANGE) { /* Overflow */
                        errno = 0;
-                       if (yytext[0] == '0') { /* octal overflow */
-                               ZVAL_DOUBLE(zendlval, zend_oct_strtod(yytext, (const char **)&end));
+                       if (lnum[0] == '0') { /* octal overflow */
+                               ZVAL_DOUBLE(zendlval, zend_oct_strtod(lnum, (const char **)&end));
                        } else {
-                               ZVAL_DOUBLE(zendlval, zend_strtod(yytext, (const char **)&end));
+                               ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
                        }
                        /* Also not an assert for the same reason */
-                       if (end != yytext + yyleng) {
+                       if (end != lnum + len) {
                                zend_throw_exception(zend_ce_parse_error,
                                        "Invalid numeric literal", 0);
                                ZVAL_UNDEF(zendlval);
+                               if (contains_underscores) {
+                                       efree(lnum);
+                               }
                                if (PARSER_MODE()) {
                                        RETURN_TOKEN(T_ERROR);
                                }
                        }
+                       if (contains_underscores) {
+                               efree(lnum);
+                       }
                        RETURN_TOKEN_WITH_VAL(T_DNUMBER);
                }
                /* Also not an assert for the same reason */
-               if (end != yytext + yyleng) {
+               if (end != lnum + len) {
                        zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
                        ZVAL_UNDEF(zendlval);
+                       if (contains_underscores) {
+                               efree(lnum);
+                       }
                        if (PARSER_MODE()) {
                                RETURN_TOKEN(T_ERROR);
                        }
@@ -1836,18 +1885,28 @@ NEWLINE ("\r"|"\n"|"\r\n")
                }
        }
        ZEND_ASSERT(!errno);
+       if (contains_underscores) {
+               efree(lnum);
+       }
        RETURN_TOKEN_WITH_VAL(T_LNUMBER);
 }
 
 <ST_IN_SCRIPTING>{HNUM} {
-       char *hex = yytext + 2; /* Skip "0x" */
-       int len = yyleng - 2;
-       char *end;
+       /* The +/- 2 skips "0x" */
+       int len = yyleng - 2, contains_underscores;
+       char *end, *hex = yytext + 2;
 
        /* Skip any leading 0s */
-       while (*hex == '0') {
-               hex++;
-               len--;
+       while (*hex == '0' || *hex == '_') {
+               ++hex;
+               --len;
+       }
+
+       contains_underscores = (memchr(hex, '_', len) != NULL);
+
+       if (contains_underscores) {
+               hex = estrndup(hex, len);
+               strip_underscores(hex, &len);
        }
 
        if (len < SIZEOF_ZEND_LONG * 2 || (len == SIZEOF_ZEND_LONG * 2 && *hex <= '7')) {
@@ -1858,11 +1917,17 @@ NEWLINE ("\r"|"\n"|"\r\n")
                        ZVAL_LONG(zendlval, ZEND_STRTOL(hex, &end, 16));
                        ZEND_ASSERT(!errno && end == hex + len);
                }
+               if (contains_underscores) {
+                       efree(hex);
+               }
                RETURN_TOKEN_WITH_VAL(T_LNUMBER);
        } else {
                ZVAL_DOUBLE(zendlval, zend_hex_strtod(hex, (const char **)&end));
                /* errno isn't checked since we allow HUGE_VAL/INF overflow */
                ZEND_ASSERT(end == hex + len);
+               if (contains_underscores) {
+                       efree(hex);
+               }
                RETURN_TOKEN_WITH_VAL(T_DNUMBER);
        }
 }
@@ -1894,10 +1959,22 @@ string:
 
 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
        const char *end;
+       int len = yyleng, contains_underscores;
+       char *dnum = yytext;
 
-       ZVAL_DOUBLE(zendlval, zend_strtod(yytext, &end));
+       contains_underscores = (memchr(dnum, '_', len) != NULL);
+
+       if (contains_underscores) {
+               dnum = estrndup(dnum, len);
+               strip_underscores(dnum, &len);
+       }
+
+       ZVAL_DOUBLE(zendlval, zend_strtod(dnum, &end));
        /* errno isn't checked since we allow HUGE_VAL/INF overflow */
-       ZEND_ASSERT(end == yytext + yyleng);
+       ZEND_ASSERT(end == dnum + len);
+       if (contains_underscores) {
+               efree(dnum);
+       }
        RETURN_TOKEN_WITH_VAL(T_DNUMBER);
 }