From: Xinchen Hui Date: Mon, 15 Jan 2018 13:01:27 +0000 (+0800) Subject: Optimized php_addslashes with SSE4.2 instructions X-Git-Tag: php-7.3.0alpha1~633 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=98aa3a65c4e68e4cd50ab62221a409e4b74cec65;p=php Optimized php_addslashes with SSE4.2 instructions According to benchmark (https://gist.github.com/laruence/fd0d443d2c5bacca9d8ab99250499956) this brings 30%+ execution time reduced. In the further, this open the door for possible SSE4.2 optimizations in other places. --- diff --git a/acinclude.m4 b/acinclude.m4 index ff814fb059..347404bfae 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -3222,9 +3222,30 @@ AC_DEFUN([PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW], [ ]) +dnl PHP_CHECK_BUILTIN_CPU_INIT +AC_DEFUN([PHP_CHECK_BUILTIN_CPU_INIT], [ + AC_MSG_CHECKING([for __builtin_cpu_init]) + + AC_TRY_LINK(, [ + return __builtin_cpu_init()? 1 : 0; + ], [ + have_builtin_cpu_init=1 + AC_MSG_RESULT([yes]) + ], [ + have_builtin_cpu_init=0 + AC_MSG_RESULT([no]) + ]) + + AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_INIT], + [$have_builtin_cpu_init], [Whether the compiler supports __builtin_cpu_init]) + +]) + dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive. m4_include([build/ax_check_compile_flag.m4]) +m4_include([build/ax_gcc_func_attribute.m4]) + dnl PHP_CHECK_VALGRIND AC_DEFUN([PHP_CHECK_VALGRIND], [ AC_MSG_CHECKING([for valgrind]) diff --git a/build/ax_gcc_func_attribute.m4 b/build/ax_gcc_func_attribute.m4 new file mode 100644 index 0000000000..79478f56b8 --- /dev/null +++ b/build/ax_gcc_func_attribute.m4 @@ -0,0 +1,241 @@ +# =========================================================================== +# https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html +# =========================================================================== +# +# SYNOPSIS +# +# AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE) +# +# DESCRIPTION +# +# This macro checks if the compiler supports one of GCC's function +# attributes; many other compilers also provide function attributes with +# the same syntax. Compiler warnings are used to detect supported +# attributes as unsupported ones are ignored by default so quieting +# warnings when using this macro will yield false positives. +# +# The ATTRIBUTE parameter holds the name of the attribute to be checked. +# +# If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_. +# +# The macro caches its result in the ax_cv_have_func_attribute_ +# variable. +# +# The macro currently supports the following function attributes: +# +# alias +# aligned +# alloc_size +# always_inline +# artificial +# cold +# const +# constructor +# constructor_priority for constructor attribute with priority +# deprecated +# destructor +# dllexport +# dllimport +# error +# externally_visible +# fallthrough +# flatten +# format +# format_arg +# gnu_inline +# hot +# ifunc +# leaf +# malloc +# noclone +# noinline +# nonnull +# noreturn +# nothrow +# optimize +# pure +# sentinel +# sentinel_position +# unused +# used +# visibility +# warning +# warn_unused_result +# weak +# weakref +# +# Unsupported function attributes will be tested with a prototype +# returning an int and not accepting any arguments and the result of the +# check might be wrong or meaningless so use with care. +# +# LICENSE +# +# Copyright (c) 2013 Gabriele Svelto +# +# Copying and distribution of this file, with or without modification, are +# permitted in any medium without royalty provided the copyright notice +# and this notice are preserved. This file is offered as-is, without any +# warranty. + +#serial 9 + +AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [ + AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1]) + + AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [ + AC_LINK_IFELSE([AC_LANG_PROGRAM([ + m4_case([$1], + [alias], [ + int foo( void ) { return 0; } + int bar( void ) __attribute__(($1("foo"))); + ], + [aligned], [ + int foo( void ) __attribute__(($1(32))); + ], + [alloc_size], [ + void *foo(int a) __attribute__(($1(1))); + ], + [always_inline], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [artificial], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [cold], [ + int foo( void ) __attribute__(($1)); + ], + [const], [ + int foo( void ) __attribute__(($1)); + ], + [constructor_priority], [ + int foo( void ) __attribute__((__constructor__(65535/2))); + ], + [constructor], [ + int foo( void ) __attribute__(($1)); + ], + [deprecated], [ + int foo( void ) __attribute__(($1(""))); + ], + [destructor], [ + int foo( void ) __attribute__(($1)); + ], + [dllexport], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [dllimport], [ + int foo( void ) __attribute__(($1)); + ], + [error], [ + int foo( void ) __attribute__(($1(""))); + ], + [externally_visible], [ + int foo( void ) __attribute__(($1)); + ], + [fallthrough], [ + int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }}; + ], + [flatten], [ + int foo( void ) __attribute__(($1)); + ], + [format], [ + int foo(const char *p, ...) __attribute__(($1(printf, 1, 2))); + ], + [format_arg], [ + char *foo(const char *p) __attribute__(($1(1))); + ], + [gnu_inline], [ + inline __attribute__(($1)) int foo( void ) { return 0; } + ], + [hot], [ + int foo( void ) __attribute__(($1)); + ], + [ifunc], [ + int my_foo( void ) { return 0; } + static int (*resolve_foo(void))(void) { return my_foo; } + int foo( void ) __attribute__(($1("resolve_foo"))); + ], + [leaf], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [malloc], [ + void *foo( void ) __attribute__(($1)); + ], + [noclone], [ + int foo( void ) __attribute__(($1)); + ], + [noinline], [ + __attribute__(($1)) int foo( void ) { return 0; } + ], + [nonnull], [ + int foo(char *p) __attribute__(($1(1))); + ], + [noreturn], [ + void foo( void ) __attribute__(($1)); + ], + [nothrow], [ + int foo( void ) __attribute__(($1)); + ], + [optimize], [ + __attribute__(($1(3))) int foo( void ) { return 0; } + ], + [pure], [ + int foo( void ) __attribute__(($1)); + ], + [sentinel], [ + int foo(void *p, ...) __attribute__(($1)); + ], + [sentinel_position], [ + int foo(void *p, ...) __attribute__(($1(1))); + ], + [returns_nonnull], [ + void *foo( void ) __attribute__(($1)); + ], + [unused], [ + int foo( void ) __attribute__(($1)); + ], + [used], [ + int foo( void ) __attribute__(($1)); + ], + [visibility], [ + int foo_def( void ) __attribute__(($1("default"))); + int foo_hid( void ) __attribute__(($1("hidden"))); + int foo_int( void ) __attribute__(($1("internal"))); + int foo_pro( void ) __attribute__(($1("protected"))); + ], + [warning], [ + int foo( void ) __attribute__(($1(""))); + ], + [warn_unused_result], [ + int foo( void ) __attribute__(($1)); + ], + [weak], [ + int foo( void ) __attribute__(($1)); + ], + [weakref], [ + static int foo( void ) { return 0; } + static int bar( void ) __attribute__(($1("foo"))); + ], + [target], [ + static int bar( void ) __attribute__(($1("sse2"))); + ], + [ + m4_warn([syntax], [Unsupported attribute $1, the test may fail]) + int foo( void ) __attribute__(($1)); + ] + )], []) + ], + dnl GCC doesn't exit with an error if an unknown attribute is + dnl provided but only outputs a warning, so accept the attribute + dnl only if no warning were issued. + [AS_IF([test -s conftest.err], + [AS_VAR_SET([ac_var], [no])], + [AS_VAR_SET([ac_var], [yes])])], + [AS_VAR_SET([ac_var], [no])]) + ]) + + AS_IF([test yes = AS_VAR_GET([ac_var])], + [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1, + [Define to 1 if the system has the `$1' function attribute])], []) + + AS_VAR_POPDEF([ac_var]) +]) diff --git a/configure.ac b/configure.ac index 61de0b48f6..bbf491cb3d 100644 --- a/configure.ac +++ b/configure.ac @@ -275,8 +275,7 @@ esac dnl Mark symbols hidden by default if the compiler (for example, gcc >= 4) dnl supports it. This can help reduce the binary size and startup time. -AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], - [CFLAGS="$CFLAGS -fvisibility=hidden"]) +dnl AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [CFLAGS="$CFLAGS -fvisibility=hidden"]) case $host_alias in *solaris*) @@ -495,7 +494,8 @@ utime.h \ sys/utsname.h \ sys/ipc.h \ dlfcn.h \ -assert.h +assert.h \ +nmmintrin.h ],[],[],[ #ifdef HAVE_SYS_PARAM_H #include @@ -565,6 +565,8 @@ dnl Check __builtin_ssubl_overflow PHP_CHECK_BUILTIN_SSUBL_OVERFLOW dnl Check __builtin_ssubll_overflow PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW +dnl Check __builtin_cpu_init +PHP_CHECK_BUILTIN_CPU_INIT dnl Check for members of the stat structure AC_STRUCT_ST_BLKSIZE @@ -585,6 +587,10 @@ AC_TYPE_UID_T dnl Checks for sockaddr_storage and sockaddr.sa_len PHP_SOCKADDR_CHECKS +AC_MSG_CHECKING([checking building environment]) +AX_GCC_FUNC_ATTRIBUTE([ifunc]) +AX_GCC_FUNC_ATTRIBUTE([target]) + dnl Check for IPv6 support AC_CACHE_CHECK([for IPv6 support], ac_cv_ipv6_support, [AC_TRY_LINK([ #include diff --git a/ext/standard/string.c b/ext/standard/string.c index 97a20fb295..0c491dde0c 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -34,6 +34,7 @@ #ifdef HAVE_MONETARY_H # include #endif + /* * This define is here because some versions of libintl redefine setlocale * to point to libintl_setlocale. That's a ridiculous thing to do as far @@ -3863,10 +3864,36 @@ PHPAPI zend_string *php_addcslashes(zend_string *str, int should_free, char *wha } /* }}} */ -/* {{{ php_addslashes +/* {{{ php_addslashes */ +#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && HAVE_FUNC_ATTRIBUTE_IFUNC && HAVE_FUNC_ATTRIBUTE_TARGET && HAVE_NMMINTRIN_H + +#include +#include "Zend/zend_bitset.h" + +PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes"))); + +zend_string *php_addslashes_sse4(zend_string *str, int should_free) __attribute__((target("sse4.2"))); +zend_string *php_addslashes_default(zend_string *str, int should_free); + +/* {{{ resolve_addslashes */ +static void *resolve_addslashes() { +#if PHP_HAVE_BUILTIN_CPU_INIT + __builtin_cpu_init(); + if (__builtin_cpu_supports("sse4.2")) { + return php_addslashes_sse4; + } +#endif + return php_addslashes_default; +} +/* }}} */ + +/* {{{ php_addslashes_sse4 */ -PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) +zend_string *php_addslashes_sse4(zend_string *str, int should_free) { + SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0"; + __m128i w128, s128; + uint32_t res = 0; /* maximum string length, worst case situation */ char *source, *target; char *end; @@ -3880,6 +3907,34 @@ PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) source = ZSTR_VAL(str); end = source + ZSTR_LEN(str); + if (ZSTR_LEN(str) > 15) { + char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15); + + if (UNEXPECTED(source != aligned)) { + do { + switch (*source) { + case '\0': + case '\'': + case '\"': + case '\\': + goto do_escape; + default: + source++; + break; + } + } while (source < aligned); + } + + w128 = _mm_load_si128((__m128i *)slashchars); + for (;end - source > 15; source += 16) { + s128 = _mm_load_si128((__m128i *)source); + res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK)); + if (res) { + goto do_escape; + } + } + } + while (source < end) { switch (*source) { case '\0': @@ -3905,6 +3960,83 @@ do_escape: memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset); target = ZSTR_VAL(new_str) + offset; + if (res) { + int pos = 0; + do { + int i, n = zend_ulong_ntz(res); + for (i = 0; i < n; i++) { + *target++ = source[pos + i]; + } + pos += n; + *target++ = '\\'; + if (source[pos] == '\0') { + *target++ = '0'; + } else { + *target++ = source[pos]; + } + pos++; + res = res >> (n + 1); + } while (res); + + for (; pos < 16; pos++) { + *target++ = source[pos]; + } + source += 16; + } else if (end - source > 15) { + char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15); + + if (source != aligned) { + do { + switch (*source) { + case '\0': + *target++ = '\\'; + *target++ = '0'; + break; + case '\'': + case '\"': + case '\\': + *target++ = '\\'; + /* break is missing *intentionally* */ + default: + *target++ = *source; + break; + } + source++; + } while (source < aligned); + } + + w128 = _mm_load_si128((__m128i *)slashchars); + } + + for (; end - source > 15; source += 16) { + int pos = 0; + s128 = _mm_load_si128((__m128i *)source); + res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK)); + if (res) { + do { + int i, n = zend_ulong_ntz(res); + for (i = 0; i < n; i++) { + *target++ = source[pos + i]; + } + pos += n; + *target++ = '\\'; + if (source[pos] == '\0') { + *target++ = '0'; + } else { + *target++ = source[pos]; + } + pos++; + res = res >> (n + 1); + } while (res); + for (; pos < 16; pos++) { + *target++ = source[pos]; + } + } else { + _mm_storeu_si128((__m128i*)target, s128); + target += 16; + } + } + while (source < end) { switch (*source) { case '\0': @@ -3920,11 +4052,88 @@ do_escape: *target++ = *source; break; } + source++; + } + + *target = '\0'; + if (should_free) { + zend_string_release(str); + } + + if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) { + new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0); + } else { + ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str); + } + + return new_str; +} +/* }}} */ + +/* {{{ php_addslashes_default + */ +zend_string *php_addslashes_default(zend_string *str, int should_free) +#else +zend_string *php_addslashes(zend_string *str, int should_free) +#endif +{ + /* maximum string length, worst case situation */ + char *source, *target; + char *end; + size_t offset; + zend_string *new_str; + + if (!str) { + return ZSTR_EMPTY_ALLOC(); + } + + source = ZSTR_VAL(str); + end = source + ZSTR_LEN(str); + + while (source < end) { + switch (*source) { + case '\0': + case '\'': + case '\"': + case '\\': + goto do_escape; + default: + source++; + break; + } + } + + if (!should_free) { + return zend_string_copy(str); + } + + return str; + +do_escape: + offset = source - (char *)ZSTR_VAL(str); + new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0); + memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset); + target = ZSTR_VAL(new_str) + offset; + while (source < end) { + switch (*source) { + case '\0': + *target++ = '\\'; + *target++ = '0'; + break; + case '\'': + case '\"': + case '\\': + *target++ = '\\'; + /* break is missing *intentionally* */ + default: + *target++ = *source; + break; + } source++; } - *target = 0; + *target = '\0'; if (should_free) { zend_string_release(str); } @@ -3938,6 +4147,7 @@ do_escape: return new_str; } /* }}} */ +/* }}} */ #define _HEB_BLOCK_TYPE_ENG 1 #define _HEB_BLOCK_TYPE_HEB 2