From 4fe3d108af2b0a1e8810959fe1ea7a0b52291ad7 Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 19 Mar 2019 13:06:21 +0100 Subject: [PATCH] Don't create a new array for empty/null match every time If PREG_OFFSET_CAPTURE is used, unmatched subpatterns will be either [null, -1] or ['', -1] depending on PREG_UNMATCHED_AS_NULL mode. Instead of creating a new array like this every time, cache it inside a global (per-request -- could make it immutable though). Additionally check whether the subpattern is an empty string or single character string and use an existing interned string in that case. Empty / single-char subpatterns are common, so let's avoid allocating strings for them. --- ext/pcre/php_pcre.c | 78 ++++++++++++++++++++++++++++++++++----------- ext/pcre/php_pcre.h | 3 ++ 2 files changed, 62 insertions(+), 19 deletions(-) diff --git a/ext/pcre/php_pcre.c b/ext/pcre/php_pcre.c index 106936e936..04e5d858c3 100644 --- a/ext/pcre/php_pcre.c +++ b/ext/pcre/php_pcre.c @@ -259,6 +259,8 @@ static PHP_GINIT_FUNCTION(pcre) /* {{{ */ pcre_globals->backtrack_limit = 0; pcre_globals->recursion_limit = 0; pcre_globals->error_code = PHP_PCRE_NO_ERROR; + ZVAL_UNDEF(&pcre_globals->unmatched_null_pair); + ZVAL_UNDEF(&pcre_globals->unmatched_empty_pair); #ifdef HAVE_PCRE_JIT_SUPPORT pcre_globals->jit = 1; #endif @@ -460,6 +462,15 @@ static PHP_RINIT_FUNCTION(pcre) /* }}} */ #endif +static PHP_RSHUTDOWN_FUNCTION(pcre) +{ + zval_ptr_dtor(&PCRE_G(unmatched_null_pair)); + zval_ptr_dtor(&PCRE_G(unmatched_empty_pair)); + ZVAL_UNDEF(&PCRE_G(unmatched_null_pair)); + ZVAL_UNDEF(&PCRE_G(unmatched_empty_pair)); + return SUCCESS; +} + /* {{{ static pcre_clean_cache */ static int pcre_clean_cache(zval *data, void *arg) { @@ -937,26 +948,57 @@ PHPAPI void php_pcre_free_match_data(pcre2_match_data *match_data) } }/*}}}*/ +static void init_unmatched_null_pair() { + zval tmp; + zval *pair = &PCRE_G(unmatched_null_pair); + array_init_size(pair, 2); + ZVAL_NULL(&tmp); + zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp); + ZVAL_LONG(&tmp, -1); + zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp); +} + +static void init_unmatched_empty_pair() { + zval tmp; + zval *pair = &PCRE_G(unmatched_empty_pair); + array_init_size(pair, 2); + ZVAL_EMPTY_STRING(&tmp); + zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp); + ZVAL_LONG(&tmp, -1); + zend_hash_next_index_insert_new(Z_ARRVAL_P(pair), &tmp); +} + /* {{{ add_offset_pair */ static inline void add_offset_pair(zval *result, char *str, size_t len, PCRE2_SIZE offset, zend_string *name, uint32_t unmatched_as_null) { zval match_pair, tmp; - array_init_size(&match_pair, 2); - /* Add (match, offset) to the return value */ if (PCRE2_UNSET == offset) { if (unmatched_as_null) { - ZVAL_NULL(&tmp); + if (Z_ISUNDEF(PCRE_G(unmatched_null_pair))) { + init_unmatched_null_pair(); + } + ZVAL_COPY(&match_pair, &PCRE_G(unmatched_null_pair)); } else { - ZVAL_EMPTY_STRING(&tmp); + if (Z_ISUNDEF(PCRE_G(unmatched_empty_pair))) { + init_unmatched_empty_pair(); + } + ZVAL_COPY(&match_pair, &PCRE_G(unmatched_empty_pair)); } } else { - ZVAL_STRINGL(&tmp, str, len); + array_init_size(&match_pair, 2); + if (len == 0) { + ZVAL_EMPTY_STRING(&tmp); + } else if (len == 1) { + ZVAL_INTERNED_STR(&tmp, ZSTR_CHAR((unsigned char) *str)); + } else { + ZVAL_STRINGL(&tmp, str, len); + } + zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); + ZVAL_LONG(&tmp, offset); + zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); } - zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); - ZVAL_LONG(&tmp, offset); - zend_hash_next_index_insert_new(Z_ARRVAL(match_pair), &tmp); if (name) { Z_ADDREF(match_pair); @@ -975,6 +1017,10 @@ static inline void populate_match_value( } else { ZVAL_EMPTY_STRING(val); } + } else if (start_offset == end_offset) { + ZVAL_EMPTY_STRING(val); + } else if (start_offset + 1 == end_offset) { + ZVAL_INTERNED_STR(val, ZSTR_CHAR((unsigned char) subject[start_offset])); } else { ZVAL_STRINGL(val, subject + start_offset, end_offset - start_offset); } @@ -1223,16 +1269,10 @@ matched: } } else { for (i = 0; i < count; i++) { - if (PCRE2_UNSET == offsets[i<<1]) { - if (unmatched_as_null) { - add_next_index_null(&match_sets[i]); - } else { - add_next_index_str(&match_sets[i], ZSTR_EMPTY_ALLOC()); - } - } else { - add_next_index_stringl(&match_sets[i], subject + offsets[i<<1], - offsets[(i<<1)+1] - offsets[i<<1]); - } + zval val; + populate_match_value( + &val, subject, offsets[2*i], offsets[2*i+1], unmatched_as_null); + zend_hash_next_index_insert_new(Z_ARRVAL(match_sets[i]), &val); } } mark = pcre2_get_mark(match_data); @@ -2955,7 +2995,7 @@ zend_module_entry pcre_module_entry = { #else NULL, #endif - NULL, + PHP_RSHUTDOWN(pcre), PHP_MINFO(pcre), PHP_PCRE_VERSION, PHP_MODULE_GLOBALS(pcre), diff --git a/ext/pcre/php_pcre.h b/ext/pcre/php_pcre.h index 0377ce77a8..eca3ca5fdf 100644 --- a/ext/pcre/php_pcre.h +++ b/ext/pcre/php_pcre.h @@ -75,6 +75,9 @@ ZEND_BEGIN_MODULE_GLOBALS(pcre) zend_bool jit; #endif int error_code; + /* Used for unmatched subpatterns in OFFSET_CAPTURE mode */ + zval unmatched_null_pair; + zval unmatched_empty_pair; ZEND_END_MODULE_GLOBALS(pcre) PHPAPI ZEND_EXTERN_MODULE_GLOBALS(pcre) -- 2.40.0