From: Nikita Popov Date: Thu, 21 Mar 2019 09:08:29 +0000 (+0100) Subject: Cleanup add_offset_pair API X-Git-Tag: php-7.4.0alpha1~694 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=621b1f0312a559bce667ccf4be0a346a56e368a2;p=php Cleanup add_offset_pair API Accept the two offsets directly, rather than doing length calculations at all callsites. Also extract the logic to create a possibly interned string. Switch the split implementation to work on a char* subject internally, because ZSTR_VAL(subject_str) is a mouthful... --- diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 701c7f00d2..30f85437f5 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -968,13 +968,40 @@ static void init_unmatched_empty_pair() { zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp); } +static zend_always_inline void populate_match_value_str( + zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset) { + if (start_offset == end_offset) { + ZVAL_EMPTY_STRING(val); + } else if (start_offset + 1 == end_offset) { + ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset])); + } else { + ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset); + } +} + +static inline void populate_match_value( + zval *val, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + uint32_t unmatched_as_null) { + if (PCRE2_UNSET == start_offset) { + if (unmatched_as_null) { + ZVAL_NULL(val); + } else { + ZVAL_EMPTY_STRING(val); + } + } else { + populate_match_value_str(val, subject, start_offset, end_offset); + } +} + /* {{{ add_offset_pair */ -static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null) +static inline void add_offset_pair( + zval *result, const char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, + zend_string *name, uint32_t unmatched_as_null) { zval match_pair, tmp; /* Add (match, offset) to the return value */ - if (PCRE2_UNSET == offset) { + if (PCRE2_UNSET == start_offset) { if (unmatched_as_null) { if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) { init_unmatched_null_pair(); @@ -988,15 +1015,9 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI } } else { array_init_size(&match_pair, 2); - if (len == 0) { - ZVAL_EMPTY_STRING(&tmp); - } else if (len == 1) { - ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str)); - } else { - ZVAL_STRINGL(&tmp, str, len); - } + populate_match_value_str(&tmp, subject, start_offset, end_offset); zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); - ZVAL_LONG(&tmp, offset); + ZVAL_LONG(&tmp, start_offset); zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); } @@ -1008,24 +1029,6 @@ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SI } /* }}} */ -static inline void populate_match_value( - zval *val, char *subject, PCRE2_SIZE start_offset, PCRE2_SIZE end_offset, - uint32_t unmatched_as_null) { - if (PCRE2_UNSET == start_offset) { - if (unmatched_as_null) { - ZVAL_NULL(val); - } else { - ZVAL_EMPTY_STRING(val); - } - } else if (start_offset == end_offset) { - ZVAL_EMPTY_STRING(val); - } else if (start_offset + 1 == end_offset) { - ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset])); - } else { - ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset); - } -} - static void populate_subpat_array( zval *subpats, char *subject, PCRE2_SIZE *offsets, zend_string **subpat_names, uint32_t num_subpats, int count, const PCRE2_SPTR mark, zend_long flags) { @@ -1036,13 +1039,13 @@ static void populate_subpat_array( if (subpat_names) { if (offset_capture) { for (i = 0; i < count; i++) { - add_offset_pair(subpats, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], subpat_names[i], unmatched_as_null); + add_offset_pair( + subpats, subject, offsets[2*i], offsets[2*i+1], + subpat_names[i], unmatched_as_null); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { - add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, subpat_names[i], 1); + add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, subpat_names[i], 1); } } } else { @@ -1068,13 +1071,12 @@ static void populate_subpat_array( } else { if (offset_capture) { for (i = 0; i < count; i++) { - add_offset_pair(subpats, subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], - offsets[i<<1], NULL, unmatched_as_null); + add_offset_pair( + subpats, subject, offsets[2*i], offsets[2*i+1], NULL, unmatched_as_null); } if (unmatched_as_null) { for (i = count; i < num_subpats; i++) { - add_offset_pair(subpats, NULL, 0, PCRE2_UNSET, NULL, 1); + add_offset_pair(subpats, NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, 1); } } } else { @@ -1288,8 +1290,9 @@ matched: /* For each subpattern, insert it into the appropriate array. */ if (offset_capture) { for (i = 0; i < count; i++) { - add_offset_pair(&match_sets[i], subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL, unmatched_as_null); + add_offset_pair( + &match_sets[i], subject, offsets[2*i], offsets[2*i+1], + NULL, unmatched_as_null); } } else { for (i = 0; i < count; i++) { @@ -1316,7 +1319,7 @@ matched: for (; i < num_subpats; i++) { if (offset_capture) { add_offset_pair( - &match_sets[i], NULL, 0, PCRE2_UNSET, + &match_sets[i], NULL, PCRE2_UNSET, PCRE2_UNSET, NULL, unmatched_as_null); } else if (unmatched_as_null) { add_next_index_null(&match_sets[i]); @@ -2490,7 +2493,6 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, uint32_t options; /* Execution options */ int count; /* Count of matched subpatterns */ PCRE2_SIZE start_offset; /* Where the new search starts */ - PCRE2_SIZE next_offset; /* End of the last delimiter match + 1 */ char *last_match; /* Location of last match */ uint32_t no_empty; /* If NO_EMPTY flag is set */ uint32_t delim_capture; /* If delimiters should be captured */ @@ -2498,6 +2500,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, uint32_t num_subpats; /* Number of captured subpatterns */ zval tmp; pcre2_match_data *match_data; + char *subject = ZSTR_VAL(subject_str); no_empty = flags & PREG_SPLIT_NO_EMPTY; delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE; @@ -2511,8 +2514,7 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, /* Start at the beginning of the string */ start_offset = 0; - next_offset = 0; - last_match = ZSTR_VAL(subject_str); + last_match = subject; PCRE_G(error_code) = PHP_PCRE_NO_ERROR; @@ -2539,11 +2541,11 @@ PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, zend_string *subject_str, #ifdef HAVE_PCRE_JIT_SUPPORT if ((pce->preg_options & PREG_JIT) && options) { - count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } else #endif - count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, options, match_data, mctx); while (1) { @@ -2563,14 +2565,15 @@ matched: break; } - if (!no_empty || &ZSTR_VAL(subject_str)[offsets[0]] != last_match) { - + if (!no_empty || &subject[offsets[0]] != last_match) { if (offset_capture) { /* Add (match, offset) pair to the return value */ - add_offset_pair(return_value, last_match, (&ZSTR_VAL(subject_str)[offsets[0]]-last_match), next_offset, NULL, 0); + add_offset_pair( + return_value, subject, last_match - subject, offsets[0], + NULL, 0); } else { /* Add the piece to the return value */ - ZVAL_STRINGL(&tmp, last_match, &ZSTR_VAL(subject_str)[offsets[0]]-last_match); + ZVAL_STRINGL(&tmp, last_match, &subject[offsets[0]]-last_match); zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } @@ -2579,19 +2582,19 @@ matched: limit_val--; } - last_match = &ZSTR_VAL(subject_str)[offsets[1]]; - next_offset = offsets[1]; + last_match = &subject[offsets[1]]; if (delim_capture) { size_t i, match_len; for (i = 1; i < count; i++) { - match_len = offsets[(i<<1)+1] - offsets[i<<1]; + match_len = offsets[2*i+1] - offsets[2*i]; /* If we have matched a delimiter */ if (!no_empty || match_len > 0) { if (offset_capture) { - add_offset_pair(return_value, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len, offsets[i<<1], NULL, 0); + add_offset_pair( + return_value, subject, offsets[2*i], offsets[2*i+1], NULL, 0); } else { - ZVAL_STRINGL(&tmp, &ZSTR_VAL(subject_str)[offsets[i<<1]], match_len); + ZVAL_STRINGL(&tmp, &subject[offsets[2*i]], match_len); zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); } } @@ -2606,7 +2609,7 @@ matched: the match again at the same point. If this fails (picked up above) we advance to the next character. */ if (start_offset == offsets[0]) { - count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK | PCRE2_NOTEMPTY_ATSTART | PCRE2_ANCHORED, match_data, mctx); if (count >= 0) { goto matched; @@ -2616,7 +2619,7 @@ matched: the start offset, and continue. Fudge the offset values to achieve this, unless we're already at the end of the string. */ if (start_offset < ZSTR_LEN(subject_str)) { - start_offset += calculate_unit_length(pce, ZSTR_VAL(subject_str) + start_offset); + start_offset += calculate_unit_length(pce, subject + start_offset); } else { break; } @@ -2640,11 +2643,11 @@ error: #ifdef HAVE_PCRE_JIT_SUPPORT if (pce->preg_options & PREG_JIT) { - count = pcre2_jit_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_jit_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } else #endif - count = pcre2_match(pce->re, (PCRE2_SPTR)ZSTR_VAL(subject_str), ZSTR_LEN(subject_str), start_offset, + count = pcre2_match(pce->re, (PCRE2_SPTR)subject, ZSTR_LEN(subject_str), start_offset, PCRE2_NO_UTF_CHECK, match_data, mctx); } if (match_data != mdata) { @@ -2657,18 +2660,18 @@ error: } last: - start_offset = (last_match - ZSTR_VAL(subject_str)); /* the offset might have been incremented, but without further successful matches */ + start_offset = (last_match - subject); /* the offset might have been incremented, but without further successful matches */ if (!no_empty || start_offset < ZSTR_LEN(subject_str)) { if (offset_capture) { /* Add the last (match, offset) pair to the return value */ - add_offset_pair(return_value, &ZSTR_VAL(subject_str)[start_offset], ZSTR_LEN(subject_str) - start_offset, start_offset, NULL, 0); + add_offset_pair(return_value, subject, start_offset, ZSTR_LEN(subject_str), NULL, 0); } else { /* Add the last piece to the return value */ - if (last_match == ZSTR_VAL(subject_str)) { + if (last_match == subject) { ZVAL_STR_COPY(&tmp, subject_str); } else { - ZVAL_STRINGL(&tmp, last_match, ZSTR_VAL(subject_str) + ZSTR_LEN(subject_str) - last_match); + ZVAL_STRINGL(&tmp, last_match, subject + ZSTR_LEN(subject_str) - last_match); } zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &tmp); }