]> granicus.if.org Git - php/commitdiff
Fix double-free on invalid large octal with separators
authorNikita Popov <nikita.ppv@gmail.com>
Sat, 14 Sep 2019 10:08:20 +0000 (12:08 +0200)
committerNikita Popov <nikita.ppv@gmail.com>
Sat, 14 Sep 2019 10:10:06 +0000 (12:10 +0200)
To clean up the mess here a bit, check for invalid octal digits
with an explicit loop instead of mixing this into the string to
number conversion.

Also clean up some type usage.

Zend/zend_language_scanner.l
ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt [new file with mode: 0644]

index 84ed669dfb12ec605f8975325e3e7cd30c56965b..86b8bb1a7047a1c1dc1c0f6266e124546cded854 100644 (file)
@@ -120,7 +120,7 @@ do {                                                                                                                                                        \
 
 BEGIN_EXTERN_C()
 
-static void strip_underscores(char *str, int *len)
+static void strip_underscores(char *str, size_t *len)
 {
        char *src = str, *dest = str;
        while (*src != '\0') {
@@ -1771,8 +1771,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
 
 <ST_IN_SCRIPTING>{BNUM} {
        /* The +/- 2 skips "0b" */
-       int len = yyleng - 2, contains_underscores;
+       size_t len = yyleng - 2;
        char *end, *bin = yytext + 2;
+       zend_bool contains_underscores;
 
        /* Skip any leading 0s */
        while (len > 0 && (*bin == '0' || *bin == '_')) {
@@ -1811,10 +1812,25 @@ NEWLINE ("\r"|"\n"|"\r\n")
 }
 
 <ST_IN_SCRIPTING>{LNUM} {
-       int len = yyleng, contains_underscores;
+       size_t len = yyleng;
        char *end, *lnum = yytext;
-
-       contains_underscores = (memchr(lnum, '_', len) != NULL);
+       zend_bool is_octal = lnum[0] == '0';
+       zend_bool contains_underscores = (memchr(lnum, '_', len) != NULL);
+
+       /* Digits 8 and 9 are illegal in octal literals. */
+       if (is_octal) {
+               size_t i;
+               for (i = 0; i < len; i++) {
+                       if (lnum[i] == '8' || lnum[i] == '9') {
+                               zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
+                               ZVAL_UNDEF(zendlval);
+                               if (PARSER_MODE()) {
+                                       RETURN_TOKEN(T_ERROR);
+                               }
+                               RETURN_TOKEN_WITH_VAL(T_LNUMBER);
+                       }
+               }
+       }
 
        if (contains_underscores) {
                lnum = estrndup(lnum, len);
@@ -1824,21 +1840,8 @@ NEWLINE ("\r"|"\n"|"\r\n")
        if (len < MAX_LENGTH_OF_LONG - 1) { /* Won't overflow */
                errno = 0;
                /* base must be passed explicitly for correct parse error on Windows */
-               ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, lnum[0] == '0' ? 8 : 10));
-               /* This isn't an assert, we need to ensure 019 isn't valid octal
-                * Because the lexing itself doesn't do that for us
-                */
-               if (end != lnum + len) {
-                       zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
-                       ZVAL_UNDEF(zendlval);
-                       if (contains_underscores) {
-                               efree(lnum);
-                       }
-                       if (PARSER_MODE()) {
-                               RETURN_TOKEN(T_ERROR);
-                       }
-                       RETURN_TOKEN_WITH_VAL(T_LNUMBER);
-               }
+               ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, is_octal ? 8 : 10));
+               ZEND_ASSERT(end == lnum + len);
        } else {
                errno = 0;
                ZVAL_LONG(zendlval, ZEND_STRTOL(lnum, &end, 0));
@@ -1849,35 +1852,13 @@ NEWLINE ("\r"|"\n"|"\r\n")
                        } else {
                                ZVAL_DOUBLE(zendlval, zend_strtod(lnum, (const char **)&end));
                        }
-                       /* Also not an assert for the same reason */
-                       if (end != lnum + len) {
-                               zend_throw_exception(zend_ce_parse_error,
-                                       "Invalid numeric literal", 0);
-                               ZVAL_UNDEF(zendlval);
-                               if (contains_underscores) {
-                                       efree(lnum);
-                               }
-                               if (PARSER_MODE()) {
-                                       RETURN_TOKEN(T_ERROR);
-                               }
-                       }
+                       ZEND_ASSERT(end == lnum + len);
                        if (contains_underscores) {
                                efree(lnum);
                        }
                        RETURN_TOKEN_WITH_VAL(T_DNUMBER);
                }
-               /* Also not an assert for the same reason */
-               if (end != lnum + len) {
-                       zend_throw_exception(zend_ce_parse_error, "Invalid numeric literal", 0);
-                       ZVAL_UNDEF(zendlval);
-                       if (contains_underscores) {
-                               efree(lnum);
-                       }
-                       if (PARSER_MODE()) {
-                               RETURN_TOKEN(T_ERROR);
-                       }
-                       RETURN_TOKEN_WITH_VAL(T_DNUMBER);
-               }
+               ZEND_ASSERT(end == lnum + len);
        }
        ZEND_ASSERT(!errno);
        if (contains_underscores) {
@@ -1888,8 +1869,9 @@ NEWLINE ("\r"|"\n"|"\r\n")
 
 <ST_IN_SCRIPTING>{HNUM} {
        /* The +/- 2 skips "0x" */
-       int len = yyleng - 2, contains_underscores;
+       size_t len = yyleng - 2;
        char *end, *hex = yytext + 2;
+       zend_bool contains_underscores;
 
        /* Skip any leading 0s */
        while (len > 0 && (*hex == '0' || *hex == '_')) {
@@ -1954,10 +1936,9 @@ string:
 
 <ST_IN_SCRIPTING>{DNUM}|{EXPONENT_DNUM} {
        const char *end;
-       int len = yyleng, contains_underscores;
+       size_t len = yyleng;
        char *dnum = yytext;
-
-       contains_underscores = (memchr(dnum, '_', len) != NULL);
+       zend_bool contains_underscores = (memchr(dnum, '_', len) != NULL);
 
        if (contains_underscores) {
                dnum = estrndup(dnum, len);
diff --git a/ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt b/ext/tokenizer/tests/invalid_large_octal_with_underscores.phpt
new file mode 100644 (file)
index 0000000..b6d43c1
--- /dev/null
@@ -0,0 +1,31 @@
+--TEST--
+Large invalid octal number with underscores
+--FILE--
+<?php
+
+var_dump(token_get_all("<?php 0_10000000000000000000009;"));
+
+?>
+--EXPECTF--
+array(3) {
+  [0]=>
+  array(3) {
+    [0]=>
+    int(%d)
+    [1]=>
+    string(6) "<?php "
+    [2]=>
+    int(1)
+  }
+  [1]=>
+  array(3) {
+    [0]=>
+    int(%d)
+    [1]=>
+    string(25) "0_10000000000000000000009"
+    [2]=>
+    int(1)
+  }
+  [2]=>
+  string(1) ";"
+}