From: Nikita Popov <nikic@php.net>
Date: Fri, 18 Nov 2016 15:41:13 +0000 (+0100)
Subject: Make php_url_parse_ex() respect length argument
X-Git-Tag: php-5.6.29RC1~1
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2d19c92fc2f14aa97db9094eaa0b67d1c3b12409;p=php

Make php_url_parse_ex() respect length argument

This should fix all out-of-bounds reads that could previously
occur if the string passed to php_url_parse_ex() is not NUL
terminated.
---

diff --git a/ext/standard/url.c b/ext/standard/url.c
index aa1b0312fe..6ecace53e5 100644
--- a/ext/standard/url.c
+++ b/ext/standard/url.c
@@ -104,7 +104,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 	ue = s + length;
 
 	/* parse scheme */
-	if ((e = memchr(s, ':', length)) && (e - s)) {
+	if ((e = memchr(s, ':', length)) && e != s) {
 		/* validate scheme */
 		p = s;
 		while (p < e) {
@@ -119,7 +119,7 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 			p++;
 		}
 
-		if (*(e + 1) == '\0') { /* only scheme is available */
+		if (e + 1 == ue) { /* only scheme is available */
 			ret->scheme = estrndup(s, (e - s));
 			php_replace_controlchars_ex(ret->scheme, (e - s));
 			return ret;
@@ -134,11 +134,11 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 			 * correctly parse things like a.com:80
 			 */
 			p = e + 1;
-			while (isdigit(*p)) {
+			while (p < ue && isdigit(*p)) {
 				p++;
 			}
 
-			if ((*p == '\0' || *p == '/') && (p - e) < 7) {
+			if ((p == ue || *p == '/') && (p - e) < 7) {
 				goto parse_port;
 			}
 
@@ -151,14 +151,14 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 			ret->scheme = estrndup(s, (e-s));
 			php_replace_controlchars_ex(ret->scheme, (e - s));
 
-			if (*(e+2) == '/') {
+			if (e + 2 < ue && *(e + 2) == '/') {
 				s = e + 3;
 				if (!strncasecmp("file", ret->scheme, sizeof("file"))) {
-					if (*(e + 3) == '/') {
+					if (e + 3 < ue && *(e + 3) == '/') {
 						/* support windows drive letters as in:
 						   file:///c:/somedir/file.txt
 						*/
-						if (*(e + 5) == ':') {
+						if (e + 5 < ue && *(e + 5) == ':') {
 							s = e + 4;
 						}
 						goto just_path;
@@ -174,18 +174,18 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 		p = e + 1;
 		pp = p;
 
-		while (pp-p < 6 && isdigit(*pp)) {
+		while (pp < ue && pp - p < 6 && isdigit(*pp)) {
 			pp++;
 		}
 
-		if (pp - p > 0 && pp - p < 6 && (*pp == '/' || *pp == '\0')) {
+		if (pp - p > 0 && pp - p < 6 && (pp == ue || *pp == '/')) {
 			long port;
 			memcpy(port_buf, p, (pp - p));
 			port_buf[pp - p] = '\0';
 			port = strtol(port_buf, NULL, 10);
 			if (port > 0 && port <= 65535) {
 				ret->port = (unsigned short) port;
-				if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
+				if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
 				    s += 2;
 				}
 			} else {
@@ -193,22 +193,32 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 				efree(ret);
 				return NULL;
 			}
-		} else if (p == pp && *pp == '\0') {
+		} else if (p == pp && pp == ue) {
 			STR_FREE(ret->scheme);
 			efree(ret);
 			return NULL;
-		} else if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
+		} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
 			s += 2;
 		} else {
 			goto just_path;
 		}
-	} else if (*s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
+	} else if (s + 1 < ue && *s == '/' && *(s + 1) == '/') { /* relative-scheme URL */
 		s += 2;
 	} else {
 		goto just_path;
 	}
 
-	e = s + strcspn(s, "/?#");
+	/* Binary-safe strcspn(s, "/?#") */
+	e = ue;
+	if ((p = memchr(s, '/', e - s))) {
+		e = p;
+	}
+	if ((p = memchr(s, '?', e - s))) {
+		e = p;
+	}
+	if ((p = memchr(s, '#', e - s))) {
+		e = p;
+	}
 
 	/* check for login and password */
 	if ((p = zend_memrchr(s, '@', (e-s)))) {
@@ -228,18 +238,16 @@ PHPAPI php_url *php_url_parse_ex(char const *str, int length)
 	}
 
 	/* check for port */
-	if (*s == '[' && *(e-1) == ']') {
+	if (s < ue && *s == '[' && *(e-1) == ']') {
 		/* Short circuit portscan,
 		   we're dealing with an
 		   IPv6 embedded address */
-		p = s;
+		p = NULL;
 	} else {
-		/* memrchr is a GNU specific extension
-		   Emulate for wide compatibility */
-		for(p = e; p >= s && *p != ':'; p--);
+		p = zend_memrchr(s, ':', (e-s));
 	}
 
-	if (p >= s && *p == ':') {
+	if (p) {
 		if (!ret->port) {
 			p++;
 			if (e-p > 5) { /* port cannot be longer then 5 characters */