}
/* }}} */
+ /* {{{ static calculate_unit_length */
+ /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
+ static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
+ {
+ int unit_len;
+
+ if (pce->compile_options & PCRE_UTF8) {
+ char *end = start;
+
+ /* skip continuation bytes */
+ while ((*++end & 0xC0) == 0x80);
+ unit_len = end - start;
+ } else {
+ unit_len = 1;
+ }
+ return unit_len;
+ }
+ /* }}} */
+
/* {{{ pcre_get_compiled_regex_cache
*/
-PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
+PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(zend_string *regex)
{
pcre *re = NULL;
pcre_extra *extra;
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
- offsets[0] = start_offset;
- offsets[1] = start_offset + unit_len;
+ int unit_len = calculate_unit_length(pce, subject + start_offset);
+
- offsets[1] = (int)(start_offset + 1);
+ offsets[0] = (int)start_offset;
++ offsets[1] = (int)(start_offset + unit_len);
} else
break;
} else {
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (g_notempty != 0 && start_offset < subject_len) {
+ int unit_len = calculate_unit_length(pce, piece);
+
offsets[0] = start_offset;
- offsets[1] = start_offset + 1;
- memcpy(&result->val[result_len], piece, 1);
- result_len++;
+ offsets[1] = start_offset + unit_len;
- memcpy(&result[*result_len], piece, unit_len);
- *result_len += unit_len;
++ memcpy(&result->val[result_len], piece, unit_len);
++ result_len += unit_len;
} else {
- new_len = *result_len + subject_len - start_offset;
- if (new_len + 1 > alloc_len) {
- alloc_len = new_len + 1; /* now we know exactly how long it is */
- new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
- memcpy(new_buf, result, *result_len);
- efree(result);
- result = new_buf;
+ if (!result && subject_str) {
+ result = zend_string_copy(subject_str);
+ break;
+ }
+ new_len = result_len + subject_len - start_offset;
+ if (new_len > alloc_len) {
+ alloc_len = new_len; /* now we know exactly how long it is */
+ if (NULL != result) {
+ result = zend_string_realloc(result, alloc_len, 0);
+ } else {
+ result = zend_string_alloc(alloc_len, 0);
+ }
}
/* stick that last bit of string on our output */
- memcpy(&result[*result_len], piece, subject_len - start_offset);
- *result_len += subject_len - start_offset;
- result[*result_len] = '\0';
+ memcpy(&result->val[result_len], piece, subject_len - start_offset);
+ result_len += subject_len - start_offset;
+ result->val[result_len] = '\0';
+ result->len = result_len;
break;
}
} else {