From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 22 Sep 2020 15:01:06 +0000 (+0200)
Subject: Handle out-of-bounds offset consistently in grapheme_* API
X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6436ddbfc8e79cfcc424741f4321e5b533c27a4f;p=php

Handle out-of-bounds offset consistently in grapheme_* API

Make sure we throw the same kind of error regardless of whether
the offset is out-of-bounds in the fast path or in the slow
path.
---

diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c
index e987c176e1..c5d2c6585c 100644
--- a/ext/intl/grapheme/grapheme_string.c
+++ b/ext/intl/grapheme/grapheme_string.c
@@ -129,21 +129,17 @@ PHP_FUNCTION(grapheme_strpos)
 		RETURN_THROWS();
 	}
 
-	if (offset >= 0) {
+	if (offset >= 0 && grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0) {
 		/* quick check to see if the string might be there
 		 * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
 		*/
 		found = php_memnstr(haystack + noffset, needle, needle_len, haystack + haystack_len);
 
 		/* if it isn't there the we are done */
-		if (!found) {
-			RETURN_FALSE;
-		}
-
-		/* if it is there, and if the haystack is ascii, we are all done */
-		if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
+		if (found) {
 			RETURN_LONG(found - haystack);
 		}
+		RETURN_FALSE;
 	}
 
 	/* do utf16 part of the strpos */
@@ -586,9 +582,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
 
 	if ( !f_ignore_case ) {
 
-		/* ASCII optimization: quick check to see if the string might be there
-		 * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
-		*/
+		/* ASCII optimization: quick check to see if the string might be there */
 		found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
 
 		/* if it isn't there the we are done */
diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c
index 71d6074999..44592edaaa 100644
--- a/ext/intl/grapheme/grapheme_util.c
+++ b/ext/intl/grapheme/grapheme_util.c
@@ -107,23 +107,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
 }
 /* }}} */
 
-#define STRPOS_CHECK_STATUS(status, error) 							\
-	if ( U_FAILURE( (status) ) ) { 									\
-		intl_error_set_code( NULL, (status) ); 			\
-		intl_error_set_custom_msg( NULL, (error), 0 ); 	\
-		if (uhaystack) { 											\
-			efree( uhaystack ); 									\
-		} 															\
-		if (uneedle) { 												\
-			efree( uneedle ); 										\
-		} 															\
-		if(bi) { 													\
-			ubrk_close (bi); 										\
-		} 															\
-		if(src) {													\
-			usearch_close(src);										\
-		}															\
-		return -1; 													\
+#define STRPOS_CHECK_STATUS(status, error) \
+	if ( U_FAILURE( (status) ) ) { \
+		intl_error_set_code( NULL, (status) ); \
+		intl_error_set_custom_msg( NULL, (error), 0 ); \
+		ret_pos = -1; \
+		goto finish; \
 	}
 
 
@@ -172,9 +161,10 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
 
 	if(offset != 0) {
 		offset_pos = grapheme_get_haystack_offset(bi, offset);
-		if(offset_pos == -1) {
-			status = U_ILLEGAL_ARGUMENT_ERROR;
-			STRPOS_CHECK_STATUS(status, "Invalid search offset");
+		if (offset_pos == -1) {
+			zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
+			ret_pos = -1;
+			goto finish;
 		}
 		status = U_ZERO_ERROR;
 		usearch_setOffset(src, offset_pos, &status);
@@ -201,14 +191,19 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
 		ret_pos = -1;
 	}
 
+finish:
 	if (uhaystack) {
 		efree( uhaystack );
 	}
 	if (uneedle) {
 		efree( uneedle );
 	}
-	ubrk_close (bi);
-	usearch_close (src);
+	if (bi) {
+		ubrk_close (bi);
+	}
+	if (src) {
+		usearch_close (src);
+	}
 
 	return ret_pos;
 }
diff --git a/ext/intl/tests/grapheme_out_of_bounds.phpt b/ext/intl/tests/grapheme_out_of_bounds.phpt
index ab7a575e21..ce74dc6dbc 100644
--- a/ext/intl/tests/grapheme_out_of_bounds.phpt
+++ b/ext/intl/tests/grapheme_out_of_bounds.phpt
@@ -8,6 +8,10 @@ var_dump(grapheme_strpos("foo", "bar", 3));
 var_dump(grapheme_stripos("foo", "bar", 3));
 var_dump(grapheme_strrpos("foo", "bar", 3));
 var_dump(grapheme_strripos("foo", "bar", 3));
+var_dump(grapheme_strpos("äöü", "bar", 3));
+var_dump(grapheme_stripos("äöü", "bar", 3));
+var_dump(grapheme_strrpos("äöü", "bar", 3));
+var_dump(grapheme_strripos("äöü", "bar", 3));
 echo "\n";
 
 // Offset == -Length is legal.
@@ -15,6 +19,17 @@ var_dump(grapheme_strpos("foo", "bar", -3));
 var_dump(grapheme_stripos("foo", "bar", -3));
 var_dump(grapheme_strrpos("foo", "bar", -3));
 var_dump(grapheme_strripos("foo", "bar", -3));
+var_dump(grapheme_strpos("äöü", "bar", -3));
+var_dump(grapheme_stripos("äöü", "bar", -3));
+var_dump(grapheme_strrpos("äöü", "bar", -3));
+var_dump(grapheme_strripos("äöü", "bar", -3));
+echo "\n";
+
+// Offset == Length is legal.
+var_dump(grapheme_strpos("", "bar", 0));
+var_dump(grapheme_stripos("", "bar", 0));
+var_dump(grapheme_strrpos("", "bar", 0));
+var_dump(grapheme_strripos("", "bar", 0));
 echo "\n";
 
 // Positive out of bounds.
@@ -38,6 +53,26 @@ try {
 } catch (ValueError $e) {
     echo $e->getMessage(), "\n";
 }
+try {
+    var_dump(grapheme_strpos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_stripos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strrpos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strripos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
 echo "\n";
 
 // Negative out of bounds.
@@ -61,6 +96,26 @@ try {
 } catch (ValueError $e) {
     echo $e->getMessage(), "\n";
 }
+try {
+    var_dump(grapheme_strpos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_stripos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strrpos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strripos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
 echo "\n";
 
 // TODO: substr is special.
@@ -75,17 +130,38 @@ bool(false)
 bool(false)
 bool(false)
 bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
 
 bool(false)
 bool(false)
 bool(false)
 bool(false)
 
+grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 
+grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)