]> granicus.if.org Git - php/commitdiff
Handle out-of-bounds offset consistently in grapheme_* API
authorNikita Popov <nikita.ppv@gmail.com>
Tue, 22 Sep 2020 15:01:06 +0000 (17:01 +0200)
committerNikita Popov <nikita.ppv@gmail.com>
Tue, 22 Sep 2020 15:01:06 +0000 (17:01 +0200)
Make sure we throw the same kind of error regardless of whether
the offset is out-of-bounds in the fast path or in the slow
path.

ext/intl/grapheme/grapheme_string.c
ext/intl/grapheme/grapheme_util.c
ext/intl/tests/grapheme_out_of_bounds.phpt

index e987c176e1a79875f3f0e0b16c19fca1a34e35e8..c5d2c6585c679a21b4089225ffb53d17f2fc17ac 100644 (file)
@@ -129,21 +129,17 @@ PHP_FUNCTION(grapheme_strpos)
                RETURN_THROWS();
        }
 
-       if (offset >= 0) {
+       if (offset >= 0 && grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0) {
                /* quick check to see if the string might be there
                 * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
                */
                found = php_memnstr(haystack + noffset, needle, needle_len, haystack + haystack_len);
 
                /* if it isn't there the we are done */
-               if (!found) {
-                       RETURN_FALSE;
-               }
-
-               /* if it is there, and if the haystack is ascii, we are all done */
-               if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) {
+               if (found) {
                        RETURN_LONG(found - haystack);
                }
+               RETURN_FALSE;
        }
 
        /* do utf16 part of the strpos */
@@ -586,9 +582,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
 
        if ( !f_ignore_case ) {
 
-               /* ASCII optimization: quick check to see if the string might be there
-                * I realize that 'offset' is 'grapheme count offset' but will work in spite of that
-               */
+               /* ASCII optimization: quick check to see if the string might be there */
                found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len);
 
                /* if it isn't there the we are done */
index 71d60749990c03e385640d12a71b423297b3a434..44592edaaa92740d9bef43094494313fa5975f4f 100644 (file)
@@ -107,23 +107,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char
 }
 /* }}} */
 
-#define STRPOS_CHECK_STATUS(status, error)                                                     \
-       if ( U_FAILURE( (status) ) ) {                                                                  \
-               intl_error_set_code( NULL, (status) );                  \
-               intl_error_set_custom_msg( NULL, (error), 0 );  \
-               if (uhaystack) {                                                                                        \
-                       efree( uhaystack );                                                                     \
-               }                                                                                                                       \
-               if (uneedle) {                                                                                          \
-                       efree( uneedle );                                                                               \
-               }                                                                                                                       \
-               if(bi) {                                                                                                        \
-                       ubrk_close (bi);                                                                                \
-               }                                                                                                                       \
-               if(src) {                                                                                                       \
-                       usearch_close(src);                                                                             \
-               }                                                                                                                       \
-               return -1;                                                                                                      \
+#define STRPOS_CHECK_STATUS(status, error) \
+       if ( U_FAILURE( (status) ) ) { \
+               intl_error_set_code( NULL, (status) ); \
+               intl_error_set_custom_msg( NULL, (error), 0 ); \
+               ret_pos = -1; \
+               goto finish; \
        }
 
 
@@ -172,9 +161,10 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
 
        if(offset != 0) {
                offset_pos = grapheme_get_haystack_offset(bi, offset);
-               if(offset_pos == -1) {
-                       status = U_ILLEGAL_ARGUMENT_ERROR;
-                       STRPOS_CHECK_STATUS(status, "Invalid search offset");
+               if (offset_pos == -1) {
+                       zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
+                       ret_pos = -1;
+                       goto finish;
                }
                status = U_ZERO_ERROR;
                usearch_setOffset(src, offset_pos, &status);
@@ -201,14 +191,19 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle,
                ret_pos = -1;
        }
 
+finish:
        if (uhaystack) {
                efree( uhaystack );
        }
        if (uneedle) {
                efree( uneedle );
        }
-       ubrk_close (bi);
-       usearch_close (src);
+       if (bi) {
+               ubrk_close (bi);
+       }
+       if (src) {
+               usearch_close (src);
+       }
 
        return ret_pos;
 }
index ab7a575e21bcd5fbf9db51cac4523101c5f65111..ce74dc6dbc69f2c5f50477283f53e2de7ca96321 100644 (file)
@@ -8,6 +8,10 @@ var_dump(grapheme_strpos("foo", "bar", 3));
 var_dump(grapheme_stripos("foo", "bar", 3));
 var_dump(grapheme_strrpos("foo", "bar", 3));
 var_dump(grapheme_strripos("foo", "bar", 3));
+var_dump(grapheme_strpos("äöü", "bar", 3));
+var_dump(grapheme_stripos("äöü", "bar", 3));
+var_dump(grapheme_strrpos("äöü", "bar", 3));
+var_dump(grapheme_strripos("äöü", "bar", 3));
 echo "\n";
 
 // Offset == -Length is legal.
@@ -15,6 +19,17 @@ var_dump(grapheme_strpos("foo", "bar", -3));
 var_dump(grapheme_stripos("foo", "bar", -3));
 var_dump(grapheme_strrpos("foo", "bar", -3));
 var_dump(grapheme_strripos("foo", "bar", -3));
+var_dump(grapheme_strpos("äöü", "bar", -3));
+var_dump(grapheme_stripos("äöü", "bar", -3));
+var_dump(grapheme_strrpos("äöü", "bar", -3));
+var_dump(grapheme_strripos("äöü", "bar", -3));
+echo "\n";
+
+// Offset == Length is legal.
+var_dump(grapheme_strpos("", "bar", 0));
+var_dump(grapheme_stripos("", "bar", 0));
+var_dump(grapheme_strrpos("", "bar", 0));
+var_dump(grapheme_strripos("", "bar", 0));
 echo "\n";
 
 // Positive out of bounds.
@@ -38,6 +53,26 @@ try {
 } catch (ValueError $e) {
     echo $e->getMessage(), "\n";
 }
+try {
+    var_dump(grapheme_strpos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_stripos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strrpos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strripos("äöü", "bar", 4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
 echo "\n";
 
 // Negative out of bounds.
@@ -61,6 +96,26 @@ try {
 } catch (ValueError $e) {
     echo $e->getMessage(), "\n";
 }
+try {
+    var_dump(grapheme_strpos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_stripos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strrpos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
+try {
+    var_dump(grapheme_strripos("äöü", "bar", -4));
+} catch (ValueError $e) {
+    echo $e->getMessage(), "\n";
+}
 echo "\n";
 
 // TODO: substr is special.
@@ -75,17 +130,38 @@ bool(false)
 bool(false)
 bool(false)
 bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
+bool(false)
 
 bool(false)
 bool(false)
 bool(false)
 bool(false)
 
+grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 
+grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
+grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)
 grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)