. openssl_random_pseudo_bytes() now throws in error conditions.
(Sammy Kaye Powers)
+- PCRE:
+ . Fixed bug #72685 (Repeated UTF-8 validation of same string in UTF-8 mode).
+ (Nikita)
+
- PDO_OCI:
. Support Oracle Database tracing attributes ACTION, MODULE,
CLIENT_INFO, and CLIENT_IDENTIFIER. (Cameron Porter)
which improves performance of this function if it can be statically
resolved. In namespaced code, this may require writing \array_key_exists()
or explicitly importing the function.
+
+- PCRE:
+ . When preg_match() in UTF-8 mode ("u" modifier) is repeatedly called on the
+ same string (but possibly different offsets), it will only be checked for
+ UTF-8 validity once.
(str) = (zend_string *)do_alloca(ZEND_MM_ALIGNED_SIZE_EX(_ZSTR_STRUCT_SIZE(_len), 8), (use_heap)); \
GC_SET_REFCOUNT(str, 1); \
GC_TYPE_INFO(str) = IS_STRING; \
- zend_string_forget_hash_val(str); \
+ ZSTR_H(str) = 0; \
ZSTR_LEN(str) = _len; \
} while (0)
static zend_always_inline void zend_string_forget_hash_val(zend_string *s)
{
ZSTR_H(s) = 0;
+ GC_DEL_FLAGS(s, IS_STR_VALID_UTF8);
}
static zend_always_inline uint32_t zend_string_refcount(const zend_string *s)
GC_SET_REFCOUNT(ret, 1);
GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
- zend_string_forget_hash_val(ret);
+ ZSTR_H(ret) = 0;
ZSTR_LEN(ret) = len;
return ret;
}
GC_SET_REFCOUNT(ret, 1);
GC_TYPE_INFO(ret) = IS_STRING | ((persistent ? IS_STR_PERSISTENT : 0) << GC_FLAGS_SHIFT);
- zend_string_forget_hash_val(ret);
+ ZSTR_H(ret) = 0;
ZSTR_LEN(ret) = (n * m) + l;
return ret;
}
#define IS_STR_INTERNED GC_IMMUTABLE /* interned string */
#define IS_STR_PERSISTENT GC_PERSISTENT /* allocated using malloc */
#define IS_STR_PERMANENT (1<<8) /* relives request boundary */
+#define IS_STR_VALID_UTF8 (1<<9) /* valid UTF-8 according to PCRE */
/* array flags */
#define IS_ARRAY_IMMUTABLE GC_IMMUTABLE
}
}
- options = (pce->compile_options & PCRE2_UTF) ? 0 : PCRE2_NO_UTF_CHECK;
+ options = (pce->compile_options & PCRE2_UTF) && !(GC_FLAGS(subject_str) & IS_STR_VALID_UTF8)
+ ? 0 : PCRE2_NO_UTF_CHECK;
/* Execute the regular expression. */
#ifdef HAVE_PCRE_JIT_SUPPORT
efree(subpat_names);
}
- /* Did we encounter an error? */
if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
+ /* If there was no error and we're in /u mode, remember that the string is valid UTF-8. */
+ if ((pce->compile_options & PCRE2_UTF) && !ZSTR_IS_INTERNED(subject_str)) {
+ GC_ADD_FLAGS(subject_str, IS_STR_VALID_UTF8);
+ }
+
RETVAL_LONG(matched);
} else {
RETVAL_FALSE;
--- /dev/null
+--TEST--
+Bug #72685: Same string is UTF-8 validated repeatedly
+--FILE--
+<?php
+
+$input_size = 64 * 1024;
+$str = str_repeat('a', $input_size);
+
+$start = microtime(true);
+$pos = 0;
+while (preg_match('/\G\w/u', $str, $m, 0, $pos)) ++$pos;
+$end = microtime(true);
+var_dump(($end - $start) < 0.5); // large margin, more like 0.05 in debug build
+
+?>
+--EXPECT--
+bool(true)