]> granicus.if.org Git - php/commitdiff
fix bug #61860: use USearch for searches, it does the right thing
authorStanislav Malyshev <stas@php.net>
Mon, 24 Jun 2013 06:24:12 +0000 (23:24 -0700)
committerStanislav Malyshev <stas@php.net>
Mon, 24 Jun 2013 06:24:12 +0000 (23:24 -0700)
ext/intl/grapheme/grapheme_string.c
ext/intl/grapheme/grapheme_util.c
ext/intl/grapheme/grapheme_util.h
ext/intl/tests/bug61860.phpt [new file with mode: 0644]

index 1b7327e0012f74ab18501e1ba8233e92a9a6039f..8a094e015e43330b53d566af8a35811986951f21 100644 (file)
@@ -113,7 +113,7 @@ PHP_FUNCTION(grapheme_strpos)
        unsigned char *found;
        long loffset = 0;
        int32_t offset = 0;
-       int ret_pos, uchar_pos;
+       int ret_pos;
 
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
 
@@ -160,10 +160,10 @@ PHP_FUNCTION(grapheme_strpos)
        }
 
        /* do utf16 part of the strpos */
-       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, &uchar_pos, 0 /* fIgnoreCase */ TSRMLS_CC );
+       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* fIgnoreCase */, 0 /* last */ TSRMLS_CC );
 
        if ( ret_pos >= 0 ) {
-               RETURN_LONG(ret_pos + offset);
+               RETURN_LONG(ret_pos);
        } else {
                RETURN_FALSE;
        }
@@ -180,7 +180,7 @@ PHP_FUNCTION(grapheme_stripos)
        unsigned char *found;
        long loffset = 0;
        int32_t offset = 0;
-       int ret_pos, uchar_pos;
+       int ret_pos;
        int is_ascii;
 
        if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", (char **)&haystack, &haystack_len, (char **)&needle, &needle_len, &loffset) == FAILURE) {
@@ -235,10 +235,10 @@ PHP_FUNCTION(grapheme_stripos)
        }
 
        /* do utf16 part of the strpos */
-       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, &uchar_pos, 1 /* fIgnoreCase */ TSRMLS_CC );
+       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 1 /* fIgnoreCase */, 0 /*last */ TSRMLS_CC );
 
        if ( ret_pos >= 0 ) {
-               RETURN_LONG(ret_pos + offset);
+               RETURN_LONG(ret_pos);
        } else {
                RETURN_FALSE;
        }
@@ -304,7 +304,7 @@ PHP_FUNCTION(grapheme_strrpos)
                /* else we need to continue via utf16 */
        }
 
-       ret_pos = grapheme_strrpos_utf16(haystack, haystack_len, needle, needle_len, offset, 0 /* f_ignore_case */ TSRMLS_CC);
+       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL, 0 /* f_ignore_case */, 1/* last */ TSRMLS_CC);
 
        if ( ret_pos >= 0 ) {
                RETURN_LONG(ret_pos);
@@ -382,7 +382,7 @@ PHP_FUNCTION(grapheme_strripos)
                /* else we need to continue via utf16 */
        }
 
-       ret_pos = grapheme_strrpos_utf16(haystack, haystack_len, needle, needle_len, offset, 1 /* f_ignore_case */ TSRMLS_CC);
+       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, offset, NULL,  1 /* f_ignore_case */, 1 /*last */ TSRMLS_CC);
 
        if ( ret_pos >= 0 ) {
                RETURN_LONG(ret_pos);
@@ -659,7 +659,7 @@ static void strstr_common_handler(INTERNAL_FUNCTION_PARAMETERS, int f_ignore_cas
        }
 
        /* need to work in utf16 */
-       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case TSRMLS_CC );
+       ret_pos = grapheme_strpos_utf16(haystack, haystack_len, needle, needle_len, 0, &uchar_pos, f_ignore_case, 0 /*last */ TSRMLS_CC );
 
        if ( ret_pos < 0 ) {
                RETURN_FALSE;
index 92008554d47b169b53f3d7a695517739513c850b..595f0cc0f9aa92d5057ec5205aec24ecdda1d1db 100644 (file)
@@ -28,6 +28,7 @@
 #include <unicode/ucol.h>
 #include <unicode/ustring.h>
 #include <unicode/ubrk.h>
+#include <unicode/usearch.h>
 
 #include "ext/standard/php_string.h"
 
@@ -47,49 +48,8 @@ grapheme_close_global_iterator( TSRMLS_D )
 }
 /* }}} */
 
-/* {{{ grapheme_intl_case_fold: convert string to lowercase */
-void
-grapheme_intl_case_fold(UChar** ptr_to_free, UChar **str, int32_t *str_len, UErrorCode *pstatus )
-{
-    UChar *dest;
-    int32_t dest_len, size_required;
-
-    /* allocate a destination string that is a bit larger than the src, hoping that is enough */
-    dest_len = (*str_len) + ( *str_len / 10 );
-    dest = (UChar*) eumalloc(dest_len);
-
-    *pstatus = U_ZERO_ERROR;
-    size_required = u_strFoldCase(dest, dest_len, *str, *str_len, U_FOLD_CASE_DEFAULT, pstatus);
-
-    dest_len = size_required;
-
-    if ( U_BUFFER_OVERFLOW_ERROR == *pstatus ) {
-
-        dest = (UChar*) eurealloc(dest, dest_len);
-
-        *pstatus = U_ZERO_ERROR;
-        size_required = u_strFoldCase(dest, dest_len, *str, *str_len, U_FOLD_CASE_DEFAULT, pstatus);
-    }
-
-    if ( U_FAILURE(*pstatus) ) {
-        return;
-    }
-
-    if ( NULL != ptr_to_free) {
-        efree(*ptr_to_free);
-        *ptr_to_free = dest;
-    }
-
-    *str = dest;
-    *str_len = dest_len;
-
-    return;
-}
-/* }}} */
-
 /* {{{ grapheme_substr_ascii f='from' - starting point, l='length' */
-void
-grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub_str, int *sub_str_len)
+void grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub_str, int *sub_str_len)
 {
     *sub_str = NULL;
 
@@ -147,225 +107,98 @@ grapheme_substr_ascii(char *str, int str_len, int f, int l, int argc, char **sub
 }
 /* }}} */
 
-/* {{{ grapheme_strrpos_utf16 - strrpos using utf16 */
-int
-grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC)
-{
-    UChar *uhaystack, *puhaystack, *uhaystack_end, *uneedle;
-    int32_t uhaystack_len, uneedle_len;
-    UErrorCode status;
-    unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
-    UBreakIterator* bi = NULL;
-    int ret_pos, pos;
-
-    /* convert the strings to UTF-16. */
-    uhaystack = NULL;
-    uhaystack_len = 0;
-    status = U_ZERO_ERROR;
-    intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );
-
-    if ( U_FAILURE( status ) ) {
-        /* Set global error code. */
-        intl_error_set_code( NULL, status TSRMLS_CC );
-
-        /* Set error messages. */
-        intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
-        if (uhaystack) {
-                       efree( uhaystack );
-               }
-        return -1;
-    }
-
-    if ( f_ignore_case ) {
-        grapheme_intl_case_fold(&uhaystack, &uhaystack, &uhaystack_len, &status );
-    }
-
-    /* get a pointer to the haystack taking into account the offset */
-    bi = NULL;
-    status = U_ZERO_ERROR;
-    bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
-
-    puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);
-
-    if ( NULL == puhaystack ) {
-        intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
-        if (uhaystack) {
-                       efree( uhaystack );
-               }
-        ubrk_close (bi);
-        return -1;
-    }
-
-    uneedle = NULL;
-    uneedle_len = 0;
-    status = U_ZERO_ERROR;
-    intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );
-
-    if ( U_FAILURE( status ) ) {
-        /* Set global error code. */
-        intl_error_set_code( NULL, status TSRMLS_CC );
-
-        /* Set error messages. */
-        intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
-        if (uhaystack) {
-                       efree( uhaystack );
-               }
-               if (uneedle) {
-                       efree( uneedle );
-               }
-        ubrk_close (bi);
-        return -1;
-    }
-
-    if ( f_ignore_case ) {
-        grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
-    }
-
-    ret_pos = -1;   /* -1 represents 'not found' */
-
-    /* back up until there's needle_len characters to compare */
-
-    uhaystack_end = uhaystack + uhaystack_len;
-    pos = ubrk_last(bi);
-    puhaystack = uhaystack + pos;
-
-    while ( uhaystack_end - puhaystack < uneedle_len ) {
-
-        pos = ubrk_previous(bi);
-
-        if ( UBRK_DONE == pos ) {
-            break;
-        }
-
-        puhaystack = uhaystack + pos;
-    }
-
-    /* is there enough haystack left to hold the needle? */
-    if ( ( uhaystack_end - puhaystack ) < uneedle_len ) {
-        /* not enough, not found */
-        goto exit;
-    }
-
-    while ( UBRK_DONE != pos ) {
-
-        if (!u_memcmp(uneedle, puhaystack, uneedle_len)) {  /* needle_len - 1 in zend memnstr? */
-
-            /* does the grapheme in the haystack end at the same place as the last grapheme in the needle? */
-
-            if ( ubrk_isBoundary(bi, pos + uneedle_len) ) {
-
-                /* found it, get grapheme count offset */
-                ret_pos = grapheme_count_graphemes(bi, uhaystack, pos);
-                break;
-            }
-
-            /* set position back */
-            ubrk_isBoundary(bi, pos);
-        }
-
-        pos = ubrk_previous(bi);
-        puhaystack = uhaystack + pos;
-    }
-
-exit:
-       if (uhaystack) {
-               efree( uhaystack );
+#define STRPOS_CHECK_STATUS(status, error)                                                     \
+       if ( U_FAILURE( (status) ) ) {                                                                  \
+               intl_error_set_code( NULL, (status) TSRMLS_CC );                        \
+               intl_error_set_custom_msg( NULL, (error), 0 TSRMLS_CC );        \
+               if (uhaystack) {                                                                                        \
+                       efree( uhaystack );                                                                     \
+               }                                                                                                                       \
+               if (uneedle) {                                                                                          \
+                       efree( uneedle );                                                                               \
+               }                                                                                                                       \
+               if(bi) {                                                                                                        \
+                       ubrk_close (bi);                                                                                \
+               }                                                                                                                       \
+               if(src) {                                                                                                       \
+                       usearch_close(src);                                                                             \
+               }                                                                                                                       \
+               return -1;                                                                                                      \
        }
-       if (uneedle) {
-               efree( uneedle );
-       }
-    ubrk_close (bi);
 
-    return ret_pos;
-}
-
-/* }}} */
 
 /* {{{ grapheme_strpos_utf16 - strrpos using utf16*/
-int
-grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case TSRMLS_DC)
+int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int32_t *puchar_pos, int f_ignore_case, int last TSRMLS_DC)
 {
-       UChar *uhaystack, *puhaystack, *uneedle;
-       int32_t uhaystack_len, uneedle_len;
-       int ret_pos;
+       UChar *uhaystack = NULL, *puhaystack, *uneedle = NULL;
+       int32_t uhaystack_len = 0, uneedle_len = 0, char_pos, ret_pos, offset_pos = 0;
        unsigned char u_break_iterator_buffer[U_BRK_SAFECLONE_BUFFERSIZE];
-       UBreakIterator* bi;
+       UBreakIterator* bi = NULL;
        UErrorCode status;
+       UStringSearch* src = NULL;
+       UCollator *coll;
 
-       *puchar_pos = -1;
-
+       if(puchar_pos) {
+               *puchar_pos = -1;
+       }
        /* convert the strings to UTF-16. */
 
-       uhaystack = NULL;
-       uhaystack_len = 0;
        status = U_ZERO_ERROR;
        intl_convert_utf8_to_utf16(&uhaystack, &uhaystack_len, (char *) haystack, haystack_len, &status );
+       STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16");
 
-       if ( U_FAILURE( status ) ) {
-               /* Set global error code. */
-               intl_error_set_code( NULL, status TSRMLS_CC );
-
-               /* Set error messages. */
-               intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
-               if (uhaystack) {
-                       efree( uhaystack );
-               }
-               return -1;
-       }
+       status = U_ZERO_ERROR;
+       intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );
+       STRPOS_CHECK_STATUS(status, "Error converting input string to UTF-16");
 
        /* get a pointer to the haystack taking into account the offset */
-       bi = NULL;
        status = U_ZERO_ERROR;
        bi = grapheme_get_break_iterator(u_break_iterator_buffer, &status TSRMLS_CC );
-       
-       puhaystack = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);
-       uhaystack_len = (uhaystack_len - ( puhaystack - uhaystack));
+       STRPOS_CHECK_STATUS(status, "Failed to get iterator");
 
-       if ( NULL == puhaystack ) {
-       
-               intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "grapheme_strpos: Offset not contained in string", 1 TSRMLS_CC );
-               if (uhaystack) {
-                       efree( uhaystack );
-               }
-               ubrk_close (bi);
-                                       
-               return -1;
-       }
+       status = U_ZERO_ERROR;
+       src = usearch_open(uneedle, uneedle_len, uhaystack, uhaystack_len, "", bi, &status);
+       STRPOS_CHECK_STATUS(status, "Error creating search object");
 
-       if ( f_ignore_case ) {
-               grapheme_intl_case_fold(&uhaystack, &puhaystack, &uhaystack_len, &status );
+       if(f_ignore_case) {
+               coll = usearch_getCollator(src);
+               status = U_ZERO_ERROR;
+               ucol_setAttribute(coll, UCOL_STRENGTH, UCOL_SECONDARY, &status);
+               STRPOS_CHECK_STATUS(status, "Error setting collation strength");
+               usearch_reset(src);
        }
 
-       uneedle = NULL;
-       uneedle_len = 0;
-       status = U_ZERO_ERROR;
-       intl_convert_utf8_to_utf16(&uneedle, &uneedle_len, (char *) needle, needle_len, &status );
+       if(offset != 0) {
+               offset_pos = grapheme_get_haystack_offset(bi, uhaystack, uhaystack_len, offset);
+               if(offset_pos == -1) {
+                       status = U_ILLEGAL_ARGUMENT_ERROR;
+                       STRPOS_CHECK_STATUS(status, "Invalid search offset");   
+               }
+               status = U_ZERO_ERROR;
+               usearch_setOffset(src, offset_pos, &status);    
+               STRPOS_CHECK_STATUS(status, "Invalid search offset");
+       }
 
-       if ( U_FAILURE( status ) ) {
-               /* Set global error code. */
-               intl_error_set_code( NULL, status TSRMLS_CC );
 
-               /* Set error messages. */
-               intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
-               if (uhaystack) {
-                       efree( uhaystack );
+       if(last) {
+               char_pos = usearch_last(src, &status);
+               if(char_pos < offset_pos) {
+                       /* last one is beyound our start offset */
+                       char_pos = USEARCH_DONE;
                }
-               if (uneedle) {
-                       efree( uneedle );
-               }
-               ubrk_close (bi);
-               
-               return -1;
+       } else {
+               char_pos = usearch_next(src, &status);
        }
-
-       if ( f_ignore_case ) {
-               grapheme_intl_case_fold(&uneedle, &uneedle, &uneedle_len, &status );
+       STRPOS_CHECK_STATUS(status, "Error looking up string");
+       if(char_pos != USEARCH_DONE && ubrk_isBoundary(bi, char_pos)) {
+               ret_pos = grapheme_count_graphemes(bi, uhaystack, char_pos);
+               if(puchar_pos) {
+                       *puchar_pos = char_pos;
+               }
+       } else {
+               ret_pos = -1;
        }
 
-       ret_pos = grapheme_memnstr_grapheme(bi, puhaystack, uneedle, uneedle_len, puhaystack + uhaystack_len );
-       
-       *puchar_pos = ubrk_current(bi);
-
        if (uhaystack) {
                efree( uhaystack );
        }
@@ -373,6 +206,7 @@ grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned ch
                efree( uneedle );
        }
        ubrk_close (bi);
+       usearch_close (src);
 
        return ret_pos;
 }
@@ -432,8 +266,7 @@ int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_a
 /* }}} */
 
 /* {{{ grapheme_count_graphemes */
-int32_t
-grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len)
+int32_t grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len)
 {
        int ret_len = 0;
        int pos = 0;
@@ -455,72 +288,9 @@ grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len)
 }
 /* }}} */
 
-/* {{{ grapheme_memnstr_grapheme: find needle in haystack using grapheme boundaries */
-int32_t
-grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end)
-{
-       UChar *p = haystack;
-       UChar ne = needle[needle_len-1];
-       UErrorCode status;
-       int32_t grapheme_offset;
-       
-       end -= needle_len;
-
-       while (p <= end) {
-
-               if ((p = u_memchr(p, *needle, (end-p+1))) && ne == p[needle_len-1]) {
-
-                       if (!u_memcmp(needle, p, needle_len - 1)) {  /* needle_len - 1 works because if needle_len is 1, we've already tested the char */
-
-                               /* does the grapheme end here? */
-
-                               status = U_ZERO_ERROR;
-                               ubrk_setText (bi, haystack, (end - haystack) + needle_len, &status);
-
-                               if ( ubrk_isBoundary (bi, (p - haystack) + needle_len) ) {
-
-                                       /* found it, get grapheme count offset */
-                                       grapheme_offset = grapheme_count_graphemes(bi, haystack, (p - haystack));
-
-                                       return grapheme_offset;
-                               }
-                       }
-               }
-
-               if (p == NULL) {
-                       return -1;
-               }
-
-               p++;
-       }
-
-       return -1;
-}
-
-/* }}} */
-
-/* {{{ grapheme_memrstr_grapheme: reverse find needle in haystack using grapheme boundaries */
-inline void *grapheme_memrchr_grapheme(const void *s, int c, int32_t n)
-{
-       register unsigned char *e;
-
-       if (n <= 0) {
-               return NULL;
-       }
-
-       for (e = (unsigned char *)s + n - 1; e >= (unsigned char *)s; e--) {
-               if (*e == (unsigned char)c) {
-                       return (void *)e;
-               }
-       }
-
-       return NULL;
-}
-/* }}} */
 
 /* {{{         grapheme_get_haystack_offset - bump the haystack pointer based on the grapheme count offset */
-UChar *
-grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhaystack_len, int32_t offset)
+int grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhaystack_len, int32_t offset)
 {
        UErrorCode              status;
        int32_t pos;
@@ -533,7 +303,7 @@ grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhays
        }
 
        if ( 0 == offset ) {
-               return uhaystack;
+               return 0;
        }
        
        if ( offset < 0 ) {
@@ -558,10 +328,10 @@ grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhays
        }
 
        if ( offset != 0 ) {
-               return NULL;
+               return -1;
        }
        
-       return uhaystack + pos;
+       return pos;
 }
 /* }}} */
 
@@ -607,8 +377,7 @@ grapheme_strrpos_ascii(unsigned char *haystack, int32_t haystack_len, unsigned c
 /* }}} */
 
 /* {{{ grapheme_get_break_iterator: get a clone of the global character break iterator */
-UBreakIterator* 
-grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC )
+UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC )
 {
        int32_t buffer_size;
 
index c91aeaff7dd12dbc25197db033460c1cc8a6979c..a2a1bc887371ee60f7a55d567ade7b03650e939f 100644 (file)
 /* get_break_interator: get a break iterator from the global structure */
 UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
 
-void
-grapheme_substr_ascii(char *str, int32_t str_len, int32_t f, int32_t l, int argc, char **sub_str, int *sub_str_len);
+void grapheme_substr_ascii(char *str, int32_t str_len, int32_t f, int32_t l, int argc, char **sub_str, int *sub_str_len);
 
-int
-grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC);
+int grapheme_strrpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int f_ignore_case TSRMLS_DC);
 
-int
-grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case TSRMLS_DC);
+int grapheme_strpos_utf16(unsigned char *haystack, int32_t haystack_len, unsigned char*needle, int32_t needle_len, int32_t offset, int *puchar_pos, int f_ignore_case, int last TSRMLS_DC);
 
 int grapheme_ascii_check(const unsigned char *day, int32_t len);
 
 int grapheme_split_string(const UChar *text, int32_t text_length, int boundary_array[], int boundary_array_len TSRMLS_DC );
 
-int32_t
-grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len);
-
-int32_t
-grapheme_memnstr_grapheme(UBreakIterator *bi, UChar *haystack, UChar *needle, int32_t needle_len, UChar *end);
+int32_t grapheme_count_graphemes(UBreakIterator *bi, UChar *string, int32_t string_len);
 
 inline void *grapheme_memrchr_grapheme(const void *s, int c, int32_t n);
 
-UChar *
-grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhaystack_len, int32_t offset);
+int grapheme_get_haystack_offset(UBreakIterator* bi, UChar *uhaystack, int32_t uhaystack_len, int32_t offset);
 
-int32_t
-grapheme_strrpos_ascii(unsigned char *haystack, int32_t haystack_len, unsigned char *needle, int32_t needle_len, int32_t offset);
+int32_t grapheme_strrpos_ascii(unsigned char *haystack, int32_t haystack_len, unsigned char *needle, int32_t needle_len, int32_t offset);
 
-UBreakIterator* 
-grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
+UBreakIterator* grapheme_get_break_iterator(void *stack_buffer, UErrorCode *status TSRMLS_DC );
 
 /* OUTSIDE_STRING: check if (possibly negative) long offset is outside the string with int32_t length */
 #define OUTSIDE_STRING(offset, max_len) ( offset <= INT32_MIN || offset > INT32_MAX || (offset < 0 ? -offset > (long) max_len : offset >= (long) max_len) )
diff --git a/ext/intl/tests/bug61860.phpt b/ext/intl/tests/bug61860.phpt
new file mode 100644 (file)
index 0000000..123d9ff
--- /dev/null
@@ -0,0 +1,18 @@
+--TEST--
+Bug #61860: Offsets may be wrong for grapheme_stri* functions
+--SKIPIF--
+<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?>
+--FILE--
+<?php
+$haystack = 'Auf der Straße nach Paris habe ich mit dem Fahrer gesprochen';
+var_dump(
+    grapheme_stripos($haystack, 'pariS '),
+    grapheme_stristr($haystack, 'paRis '),
+    grapheme_substr($haystack, grapheme_stripos($haystack, 'Paris'))
+);
+
+?>
+--EXPECT--
+int(20)
+string(40) "Paris habe ich mit dem Fahrer gesprochen"
+string(40) "Paris habe ich mit dem Fahrer gesprochen"