/* NOT REACHED */
}
+static const unsigned char *mbfl_find_offset_utf8(const mbfl_string *str, ssize_t offset) {
+ if (offset < 0) {
+ const unsigned char *pos = str->val + str->len;
+ const unsigned char *begin = str->val;
+ while (offset < 0) {
+ if (pos <= begin) {
+ return NULL;
+ }
+
+ unsigned char c = *(--pos);
+ if (c < 0x80) {
+ ++offset;
+ } else if ((c & 0xc0) != 0x80) {
+ ++offset;
+ }
+ }
+ return pos;
+ } else {
+ const unsigned char *u8_tbl = mbfl_encoding_utf8.mblen_table;
+ const unsigned char *pos = str->val;
+ const unsigned char *end = str->val + str->len;
+ while (offset-- > 0) {
+ if (pos >= end) {
+ return NULL;
+ }
+ pos += u8_tbl[*pos];
+ }
+ return pos;
+ }
+}
+
size_t
mbfl_strpos(
mbfl_string *haystack,
size_t result;
mbfl_string _haystack_u8, _needle_u8;
const mbfl_string *haystack_u8, *needle_u8 = NULL;
- const unsigned char *u8_tbl = mbfl_encoding_utf8.mblen_table;
+ const unsigned char *offset_pointer;
if (haystack->encoding->no_encoding != mbfl_no_encoding_utf8) {
mbfl_string_init(&_haystack_u8);
needle_u8 = needle;
}
+ offset_pointer = mbfl_find_offset_utf8(haystack_u8, offset);
+ if (!offset_pointer) {
+ result = MBFL_ERROR_OFFSET;
+ goto out;
+ }
+
result = MBFL_ERROR_NOT_FOUND;
if (haystack_u8->len < needle_u8->len) {
goto out;
if (needle_u8->len == 0) {
size_t haystack_length = mbfl_strlen(haystack_u8);
- /* Check if offset is out of bound */
- if (
- (offset > 0 && offset > haystack_length)
- || (offset < 0 && -offset > haystack_length)
- ) {
- result = -16;
- goto out;
- }
-
if (offset < 0) {
result = haystack_length + offset;
} else if (reverse) {
jtbl[needle_u8_val[i]] = needle_u8_len - i;
}
e = haystack_u8_val + haystack_u8->len;
- p = haystack_u8_val;
- while (offset-- > 0) {
- if (p >= e) {
- result = MBFL_ERROR_OFFSET;
- goto out;
- }
- p += u8_tbl[*p];
- }
- p += needle_u8_len;
+ p = offset_pointer + needle_u8_len;
if (p > e) {
goto out;
}
}
}
} else {
- const unsigned char *ee = haystack_u8_val + haystack_u8->len;
- while (offset-- > 0) {
- if (e >= ee) {
- result = MBFL_ERROR_OFFSET;
- goto out;
- }
- e += u8_tbl[*e];
- }
+ e = offset_pointer;
}
if (p < e + needle_u8_len) {
goto out;
}
/* }}} */
+static void handle_strpos_error(size_t error) {
+ switch (error) {
+ case MBFL_ERROR_NOT_FOUND:
+ break;
+ case MBFL_ERROR_ENCODING:
+ php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
+ break;
+ case MBFL_ERROR_OFFSET:
+ php_error_docref(NULL, E_WARNING, "Offset not contained in string");
+ break;
+ default:
+ php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
+ break;
+ }
+}
+
/* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)
RETURN_FALSE;
}
- if (offset != 0) {
- size_t slen = mbfl_strlen(&haystack);
- if (offset < 0) {
- offset += slen;
- }
- if (offset < 0 || offset > slen) {
- php_error_docref(NULL, E_WARNING, "Offset not contained in string");
- RETURN_FALSE;
- }
- }
-
n = mbfl_strpos(&haystack, &needle, offset, reverse);
if (!mbfl_is_error(n)) {
RETVAL_LONG(n);
} else {
- switch (n) {
- case MBFL_ERROR_NOT_FOUND:
- break;
- case MBFL_ERROR_ENCODING:
- php_error_docref(NULL, E_WARNING, "Unknown encoding or conversion error");
- break;
- case MBFL_ERROR_OFFSET:
- php_error_docref(NULL, E_WARNING, "Offset not contained in string");
- break;
- default:
- php_error_docref(NULL, E_WARNING, "Unknown error in mb_strpos");
- break;
- }
+ handle_strpos_error(n);
RETVAL_FALSE;
}
}
RETURN_FALSE;
}
- if (offset != 0) {
- size_t haystack_char_len = mbfl_strlen(&haystack);
- if ((offset > 0 && offset > haystack_char_len) ||
- (offset < 0 && -offset > haystack_char_len)) {
- php_error_docref(NULL, E_WARNING, "Offset is greater than the length of haystack string");
- RETURN_FALSE;
- }
- }
-
n = mbfl_strpos(&haystack, &needle, offset, 1);
if (!mbfl_is_error(n)) {
RETVAL_LONG(n);
} else {
+ handle_strpos_error(n);
RETVAL_FALSE;
}
}
-- Offset is -25 --
Multibyte String:
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
ASCII String:
mb_strrpos:
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
strrpos:
Offset not contained in string
-- Offset is -24 --
Multibyte String:
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
ASCII String:
mb_strrpos:
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
strrpos:
Offset not contained in string
bool(false)
> Offset: 12
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
> Offset: -1
int(8)
int(4)
> Offset: -20
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
------- strripos -----------
--- /dev/null
+--TEST--
+Offset errors for various strpos functions
+--FILE--
+<?php
+
+var_dump(mb_strpos("f", "bar", 3));
+var_dump(mb_strpos("f", "bar", -3));
+var_dump(mb_strrpos("f", "bar", 3));
+var_dump(mb_strrpos("f", "bar", -3));
+var_dump(mb_stripos("f", "bar", 3));
+var_dump(mb_stripos("f", "bar", -3));
+var_dump(mb_strripos("f", "bar", 3));
+var_dump(mb_strripos("f", "bar", -3));
+
+?>
+--EXPECTF--
+Warning: mb_strpos(): Offset not contained in string in %s on line %d
+bool(false)
+
+Warning: mb_strpos(): Offset not contained in string in %s on line %d
+bool(false)
+
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
+bool(false)
+
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
+bool(false)
+
+Warning: mb_stripos(): Offset not contained in string in %s on line %d
+bool(false)
+
+Warning: mb_stripos(): Offset not contained in string in %s on line %d
+bool(false)
+
+Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d
+bool(false)
+
+Warning: mb_strripos(): Offset is greater than the length of haystack string in %s on line %d
+bool(false)
-- ASCII string with out of bound positive offset --
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
-- ASCII string with out of bound negative offset --
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
-- Multi-byte string without offset --
-- Multi-byte string with out of bound positive offset --
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)
-- Multi-byte string with out of bound negative offset --
-Warning: mb_strrpos(): Offset is greater than the length of haystack string in %s on line %d
+Warning: mb_strrpos(): Offset not contained in string in %s on line %d
bool(false)