From: Nikita Popov <nikita.ppv@gmail.com> Date: Tue, 22 Sep 2020 15:01:06 +0000 (+0200) Subject: Handle out-of-bounds offset consistently in grapheme_* API X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=6436ddbfc8e79cfcc424741f4321e5b533c27a4f;p=php Handle out-of-bounds offset consistently in grapheme_* API Make sure we throw the same kind of error regardless of whether the offset is out-of-bounds in the fast path or in the slow path. --- diff --git a/ext/intl/grapheme/grapheme_string.c b/ext/intl/grapheme/grapheme_string.c index e987c176e1..c5d2c6585c 100644 --- a/ext/intl/grapheme/grapheme_string.c +++ b/ext/intl/grapheme/grapheme_string.c @@ -129,21 +129,17 @@ PHP_FUNCTION(grapheme_strpos) RETURN_THROWS(); } - if (offset >= 0) { + if (offset >= 0 && grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0) { /* quick check to see if the string might be there * I realize that 'offset' is 'grapheme count offset' but will work in spite of that */ found = php_memnstr(haystack + noffset, needle, needle_len, haystack + haystack_len); /* if it isn't there the we are done */ - if (!found) { - RETURN_FALSE; - } - - /* if it is there, and if the haystack is ascii, we are all done */ - if ( grapheme_ascii_check((unsigned char *)haystack, haystack_len) >= 0 ) { + if (found) { RETURN_LONG(found - haystack); } + RETURN_FALSE; } /* do utf16 part of the strpos */ @@ -586,9 +582,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas if ( !f_ignore_case ) { - /* ASCII optimization: quick check to see if the string might be there - * I realize that 'offset' is 'grapheme count offset' but will work in spite of that - */ + /* ASCII optimization: quick check to see if the string might be there */ found = php_memnstr(haystack, needle, needle_len, haystack + haystack_len); /* if it isn't there the we are done */ diff --git a/ext/intl/grapheme/grapheme_util.c b/ext/intl/grapheme/grapheme_util.c index 71d6074999..44592edaaa 100644 --- a/ext/intl/grapheme/grapheme_util.c +++ b/ext/intl/grapheme/grapheme_util.c @@ -107,23 +107,12 @@ void grapheme_substr_ascii(char *str, size_t str_len, int32_t f, int32_t l, char } /* }}} */ -#define STRPOS_CHECK_STATUS(status, error) \ - if ( U_FAILURE( (status) ) ) { \ - intl_error_set_code( NULL, (status) ); \ - intl_error_set_custom_msg( NULL, (error), 0 ); \ - if (uhaystack) { \ - efree( uhaystack ); \ - } \ - if (uneedle) { \ - efree( uneedle ); \ - } \ - if(bi) { \ - ubrk_close (bi); \ - } \ - if(src) { \ - usearch_close(src); \ - } \ - return -1; \ +#define STRPOS_CHECK_STATUS(status, error) \ + if ( U_FAILURE( (status) ) ) { \ + intl_error_set_code( NULL, (status) ); \ + intl_error_set_custom_msg( NULL, (error), 0 ); \ + ret_pos = -1; \ + goto finish; \ } @@ -172,9 +161,10 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, if(offset != 0) { offset_pos = grapheme_get_haystack_offset(bi, offset); - if(offset_pos == -1) { - status = U_ILLEGAL_ARGUMENT_ERROR; - STRPOS_CHECK_STATUS(status, "Invalid search offset"); + if (offset_pos == -1) { + zend_argument_value_error(3, "must be contained in argument #1 ($haystack)"); + ret_pos = -1; + goto finish; } status = U_ZERO_ERROR; usearch_setOffset(src, offset_pos, &status); @@ -201,14 +191,19 @@ int32_t grapheme_strpos_utf16(char *haystack, size_t haystack_len, char *needle, ret_pos = -1; } +finish: if (uhaystack) { efree( uhaystack ); } if (uneedle) { efree( uneedle ); } - ubrk_close (bi); - usearch_close (src); + if (bi) { + ubrk_close (bi); + } + if (src) { + usearch_close (src); + } return ret_pos; } diff --git a/ext/intl/tests/grapheme_out_of_bounds.phpt b/ext/intl/tests/grapheme_out_of_bounds.phpt index ab7a575e21..ce74dc6dbc 100644 --- a/ext/intl/tests/grapheme_out_of_bounds.phpt +++ b/ext/intl/tests/grapheme_out_of_bounds.phpt @@ -8,6 +8,10 @@ var_dump(grapheme_strpos("foo", "bar", 3)); var_dump(grapheme_stripos("foo", "bar", 3)); var_dump(grapheme_strrpos("foo", "bar", 3)); var_dump(grapheme_strripos("foo", "bar", 3)); +var_dump(grapheme_strpos("äöü", "bar", 3)); +var_dump(grapheme_stripos("äöü", "bar", 3)); +var_dump(grapheme_strrpos("äöü", "bar", 3)); +var_dump(grapheme_strripos("äöü", "bar", 3)); echo "\n"; // Offset == -Length is legal. @@ -15,6 +19,17 @@ var_dump(grapheme_strpos("foo", "bar", -3)); var_dump(grapheme_stripos("foo", "bar", -3)); var_dump(grapheme_strrpos("foo", "bar", -3)); var_dump(grapheme_strripos("foo", "bar", -3)); +var_dump(grapheme_strpos("äöü", "bar", -3)); +var_dump(grapheme_stripos("äöü", "bar", -3)); +var_dump(grapheme_strrpos("äöü", "bar", -3)); +var_dump(grapheme_strripos("äöü", "bar", -3)); +echo "\n"; + +// Offset == Length is legal. +var_dump(grapheme_strpos("", "bar", 0)); +var_dump(grapheme_stripos("", "bar", 0)); +var_dump(grapheme_strrpos("", "bar", 0)); +var_dump(grapheme_strripos("", "bar", 0)); echo "\n"; // Positive out of bounds. @@ -38,6 +53,26 @@ try { } catch (ValueError $e) { echo $e->getMessage(), "\n"; } +try { + var_dump(grapheme_strpos("äöü", "bar", 4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump(grapheme_stripos("äöü", "bar", 4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump(grapheme_strrpos("äöü", "bar", 4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump(grapheme_strripos("äöü", "bar", 4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} echo "\n"; // Negative out of bounds. @@ -61,6 +96,26 @@ try { } catch (ValueError $e) { echo $e->getMessage(), "\n"; } +try { + var_dump(grapheme_strpos("äöü", "bar", -4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump(grapheme_stripos("äöü", "bar", -4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump(grapheme_strrpos("äöü", "bar", -4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} +try { + var_dump(grapheme_strripos("äöü", "bar", -4)); +} catch (ValueError $e) { + echo $e->getMessage(), "\n"; +} echo "\n"; // TODO: substr is special. @@ -75,17 +130,38 @@ bool(false) bool(false) bool(false) bool(false) +bool(false) +bool(false) +bool(false) +bool(false) + +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) +bool(false) bool(false) bool(false) bool(false) bool(false) +grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) +grapheme_strripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_strpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_stripos(): Argument #3 ($offset) must be contained in argument #1 ($haystack) grapheme_strrpos(): Argument #3 ($offset) must be contained in argument #1 ($haystack)