zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp);
}
+static zend_always_inline void populate_match_value_str(
+ zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) {
+ if (start_offset == end_offset) {
+ ZVAL_EMPTY_STRING(val);
+ } else if (start_offset + 1 == end_offset) {
+ ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
+ } else {
+ ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
+ }
+}
+
+static inline void populate_match_value(
+ zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
+ uint32_t unmatched_as_null) {
+ if (PCRE2_UNSET == start_offset) {
+ if (unmatched_as_null) {
+ ZVAL_NULL(val);
+ } else {
+ ZVAL_EMPTY_STRING(val);
+ }
+ } else {
+ populate_match_value_str(val, subject, start_offset, end_offset);
+ }
+}
+
/* {{{ add_offset_pair */
-static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null)
+static inline void add_offset_pair(
+ zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
+ zend_string *name, uint32_t unmatched_as_null)
{
zval match_pair, tmp;
/* Add (match, offset) to the return value */
- if (PCRE2_UNSET == offset) {
+ if (PCRE2_UNSET == start_offset) {
if (unmatched_as_null) {
if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) {
init_unmatched_null_pair();
}
} else {
array_init_size(&match_pair, 2);
- if (len == 0) {
- ZVAL_EMPTY_STRING(&tmp);
- } else if (len == 1) {
- ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str));
- } else {
- ZVAL_STRINGL(&tmp, str, len);
- }
+ populate_match_value_str(&tmp, subject, start_offset, end_offset);
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
- ZVAL_LONG(&tmp, offset);
+ ZVAL_LONG(&tmp, start_offset);
zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp);
}
}
/* }}} */
-static inline void populate_match_value(
- zval *val, char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset,
- uint32_t unmatched_as_null) {
- if (PCRE2_UNSET == start_offset) {
- if (unmatched_as_null) {
- ZVAL_NULL(val);
- } else {
- ZVAL_EMPTY_STRING(val);
- }
- } else if (start_offset == end_offset) {
- ZVAL_EMPTY_STRING(val);
- } else if (start_offset + 1 == end_offset) {
- ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset]));
- } else {
- ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset);
- }
-}
-
static void populate_subpat_array(
zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names,
uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) {
if (subpat_names) {
if (offset_capture) {
for (i = 0; i < count; i++) {
- add_offset_pair(subpats, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], subpat_names[i], unmatched_as_null);
+ add_offset_pair(
+ subpats, subject, offsets[2*i], offsets[2*i+1],
+ subpat_names[i], unmatched_as_null);
}
if (unmatched_as_null) {
for (i = count; i < num_subpats; i++) {
- add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, subpat_names[i], 1);
+ add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1);
}
}
} else {
} else {
if (offset_capture) {
for (i = 0; i < count; i++) {
- add_offset_pair(subpats, subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1],
- offsets[i<<1], NULL, unmatched_as_null);
+ add_offset_pair(
+ subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null);
}
if (unmatched_as_null) {
for (i = count; i < num_subpats; i++) {
- add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, NULL, 1);
+ add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1);
}
}
} else {
/* For each subpattern, insert it into the appropriate array. */
if (offset_capture) {
for (i = 0; i < count; i++) {
- add_offset_pair(&match_sets[i], subject + offsets[i<<1],
- offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null);
+ add_offset_pair(
+ &match_sets[i], subject, offsets[2*i], offsets[2*i+1],
+ NULL, unmatched_as_null);
}
} else {
for (i = 0; i < count; i++) {
for (; i < num_subpats; i++) {
if (offset_capture) {
add_offset_pair(
- &match_sets[i], NULL, 0, PCRE2_UNSET,
+ &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET,
NULL, unmatched_as_null);
} else if (unmatched_as_null) {
add_next_index_null(&match_sets[i]);
uint32_t options; /* Execution options */
int count; /* Count of matched subpatterns */
PCRE2_SIZE start_offset; /* Where the new search starts */
- PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */
char *last_match; /* Location of last match */
uint32_t no_empty; /* If NO_EMPTY flag is set */
uint32_t delim_capture; /* If delimiters should be captured */
uint32_t num_subpats; /* Number of captured subpatterns */
zval tmp;
pcre2_match_data *match_data;
+ char *subject = ZSTR_VAL(subject_str);
no_empty = flags & PREG_SPLIT_NO_EMPTY;
delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
/* Start at the beginning of the string */
start_offset = 0;
- next_offset = 0;
- last_match = ZSTR_VAL(subject_str);
+ last_match = subject;
PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
#ifdef HAVE_PCRE_JIT_SUPPORT
if ((pce->preg_options & PREG_JIT) && options) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
options, match_data, mctx);
while (1) {
break;
}
- if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) {
-
+ if (!no_empty || &subject[offsets[0]] != last_match) {
if (offset_capture) {
/* Add (match, offset) pair to the return value */
- add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0);
+ add_offset_pair(
+ return_value, subject, last_match - subject, offsets[0],
+ NULL, 0);
} else {
/* Add the piece to the return value */
- ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match);
+ ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
limit_val--;
}
- last_match = &ZSTR_VAL(subject_str)[offsets[1]];
- next_offset = offsets[1];
+ last_match = &subject[offsets[1]];
if (delim_capture) {
size_t i, match_len;
for (i = 1; i < count; i++) {
- match_len = offsets[(i<<1)+1] - offsets[i<<1];
+ match_len = offsets[2*i+1] - offsets[2*i];
/* If we have matched a delimiter */
if (!no_empty || match_len > 0) {
if (offset_capture) {
- add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0);
+ add_offset_pair(
+ return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0);
} else {
- ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len);
+ ZVAL_STRINGL(&tmp, &subject[offsets[2*i]], match_len);
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}
}
the match again at the same point. If this fails (picked up above) we
advance to the next character. */
if (start_offset == offsets[0]) {
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx);
if (count >= 0) {
goto matched;
the start offset, and continue. Fudge the offset values
to achieve this, unless we're already at the end of the string. */
if (start_offset < ZSTR_LEN(subject_str)) {
- start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset);
+ start_offset += calculate_unit_length(pce, subject + start_offset);
} else {
break;
}
#ifdef HAVE_PCRE_JIT_SUPPORT
if (pce->preg_options & PREG_JIT) {
- count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
} else
#endif
- count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset,
+ count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset,
PCRE2_NO_UTF_CHECK, match_data, mctx);
}
if (match_data != mdata) {
}
last:
- start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */
+ start_offset = (last_match - subject); /* the offset might have been incremented, but without further successful matches */
if (!no_empty || start_offset < ZSTR_LEN(subject_str)) {
if (offset_capture) {
/* Add the last (match, offset) pair to the return value */
- add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0);
+ add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0);
} else {
/* Add the last piece to the return value */
- if (last_match == ZSTR_VAL(subject_str)) {
+ if (last_match == subject) {
ZVAL_STR_COPY(&tmp, subject_str);
} else {
- ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match);
+ ZVAL_STRINGL(&tmp, last_match, subject + ZSTR_LEN(subject_str) - last_match);
}
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp);
}