]> granicus.if.org Git - php/commitdiff
Optimized php_addslashes with SSE4.2 instructions
authorXinchen Hui <laruence@gmail.com>
Mon, 15 Jan 2018 13:01:27 +0000 (21:01 +0800)
committerXinchen Hui <laruence@gmail.com>
Mon, 15 Jan 2018 13:17:50 +0000 (21:17 +0800)
According to benchmark
(https://gist.github.com/laruence/fd0d443d2c5bacca9d8ab99250499956) this brings 30%+ execution time reduced.

In the further, this open the door for possible SSE4.2 optimizations in
other places.

acinclude.m4
build/ax_gcc_func_attribute.m4 [new file with mode: 0644]
configure.ac
ext/standard/string.c

index ff814fb0596efb40d6693096a7766ad8a9629f06..347404bfaea1fbcf14d791838c6ba6f6152e5e2f 100644 (file)
@@ -3222,9 +3222,30 @@ AC_DEFUN([PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW], [
 
 ])
 
+dnl PHP_CHECK_BUILTIN_CPU_INIT
+AC_DEFUN([PHP_CHECK_BUILTIN_CPU_INIT], [
+  AC_MSG_CHECKING([for __builtin_cpu_init])
+
+  AC_TRY_LINK(, [
+    return __builtin_cpu_init()? 1 : 0;
+  ], [
+    have_builtin_cpu_init=1
+    AC_MSG_RESULT([yes])
+  ], [
+    have_builtin_cpu_init=0
+    AC_MSG_RESULT([no])
+  ])
+
+  AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_INIT],
+   [$have_builtin_cpu_init], [Whether the compiler supports __builtin_cpu_init])
+
+])
+
 dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive.
 m4_include([build/ax_check_compile_flag.m4])
 
+m4_include([build/ax_gcc_func_attribute.m4])
+
 dnl PHP_CHECK_VALGRIND
 AC_DEFUN([PHP_CHECK_VALGRIND], [
   AC_MSG_CHECKING([for valgrind])
diff --git a/build/ax_gcc_func_attribute.m4 b/build/ax_gcc_func_attribute.m4
new file mode 100644 (file)
index 0000000..79478f5
--- /dev/null
@@ -0,0 +1,241 @@
+# ===========================================================================
+#  https://www.gnu.org/software/autoconf-archive/ax_gcc_func_attribute.html
+# ===========================================================================
+#
+# SYNOPSIS
+#
+#   AX_GCC_FUNC_ATTRIBUTE(ATTRIBUTE)
+#
+# DESCRIPTION
+#
+#   This macro checks if the compiler supports one of GCC's function
+#   attributes; many other compilers also provide function attributes with
+#   the same syntax. Compiler warnings are used to detect supported
+#   attributes as unsupported ones are ignored by default so quieting
+#   warnings when using this macro will yield false positives.
+#
+#   The ATTRIBUTE parameter holds the name of the attribute to be checked.
+#
+#   If ATTRIBUTE is supported define HAVE_FUNC_ATTRIBUTE_<ATTRIBUTE>.
+#
+#   The macro caches its result in the ax_cv_have_func_attribute_<attribute>
+#   variable.
+#
+#   The macro currently supports the following function attributes:
+#
+#    alias
+#    aligned
+#    alloc_size
+#    always_inline
+#    artificial
+#    cold
+#    const
+#    constructor
+#    constructor_priority for constructor attribute with priority
+#    deprecated
+#    destructor
+#    dllexport
+#    dllimport
+#    error
+#    externally_visible
+#    fallthrough
+#    flatten
+#    format
+#    format_arg
+#    gnu_inline
+#    hot
+#    ifunc
+#    leaf
+#    malloc
+#    noclone
+#    noinline
+#    nonnull
+#    noreturn
+#    nothrow
+#    optimize
+#    pure
+#    sentinel
+#    sentinel_position
+#    unused
+#    used
+#    visibility
+#    warning
+#    warn_unused_result
+#    weak
+#    weakref
+#
+#   Unsupported function attributes will be tested with a prototype
+#   returning an int and not accepting any arguments and the result of the
+#   check might be wrong or meaningless so use with care.
+#
+# LICENSE
+#
+#   Copyright (c) 2013 Gabriele Svelto <gabriele.svelto@gmail.com>
+#
+#   Copying and distribution of this file, with or without modification, are
+#   permitted in any medium without royalty provided the copyright notice
+#   and this notice are preserved.  This file is offered as-is, without any
+#   warranty.
+
+#serial 9
+
+AC_DEFUN([AX_GCC_FUNC_ATTRIBUTE], [
+    AS_VAR_PUSHDEF([ac_var], [ax_cv_have_func_attribute_$1])
+
+    AC_CACHE_CHECK([for __attribute__(($1))], [ac_var], [
+        AC_LINK_IFELSE([AC_LANG_PROGRAM([
+            m4_case([$1],
+                [alias], [
+                    int foo( void ) { return 0; }
+                    int bar( void ) __attribute__(($1("foo")));
+                ],
+                [aligned], [
+                    int foo( void ) __attribute__(($1(32)));
+                ],
+                [alloc_size], [
+                    void *foo(int a) __attribute__(($1(1)));
+                ],
+                [always_inline], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [artificial], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [cold], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [const], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [constructor_priority], [
+                    int foo( void ) __attribute__((__constructor__(65535/2)));
+                ],
+                [constructor], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [deprecated], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [destructor], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [dllexport], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [dllimport], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [error], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [externally_visible], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [fallthrough], [
+                    int foo( void ) {switch (0) { case 1: __attribute__(($1)); case 2: break ; }};
+                ],
+                [flatten], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [format], [
+                    int foo(const char *p, ...) __attribute__(($1(printf, 1, 2)));
+                ],
+                [format_arg], [
+                    char *foo(const char *p) __attribute__(($1(1)));
+                ],
+                [gnu_inline], [
+                    inline __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [hot], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [ifunc], [
+                    int my_foo( void ) { return 0; }
+                    static int (*resolve_foo(void))(void) { return my_foo; }
+                    int foo( void ) __attribute__(($1("resolve_foo")));
+                ],
+                [leaf], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [malloc], [
+                    void *foo( void ) __attribute__(($1));
+                ],
+                [noclone], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [noinline], [
+                    __attribute__(($1)) int foo( void ) { return 0; }
+                ],
+                [nonnull], [
+                    int foo(char *p) __attribute__(($1(1)));
+                ],
+                [noreturn], [
+                    void foo( void ) __attribute__(($1));
+                ],
+                [nothrow], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [optimize], [
+                    __attribute__(($1(3))) int foo( void ) { return 0; }
+                ],
+                [pure], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [sentinel], [
+                    int foo(void *p, ...) __attribute__(($1));
+                ],
+                [sentinel_position], [
+                    int foo(void *p, ...) __attribute__(($1(1)));
+                ],
+                [returns_nonnull], [
+                    void *foo( void ) __attribute__(($1));
+                ],
+                [unused], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [used], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [visibility], [
+                    int foo_def( void ) __attribute__(($1("default")));
+                    int foo_hid( void ) __attribute__(($1("hidden")));
+                    int foo_int( void ) __attribute__(($1("internal")));
+                    int foo_pro( void ) __attribute__(($1("protected")));
+                ],
+                [warning], [
+                    int foo( void ) __attribute__(($1("")));
+                ],
+                [warn_unused_result], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [weak], [
+                    int foo( void ) __attribute__(($1));
+                ],
+                [weakref], [
+                    static int foo( void ) { return 0; }
+                    static int bar( void ) __attribute__(($1("foo")));
+                ],
+                [target], [
+                    static int bar( void ) __attribute__(($1("sse2")));
+                ],
+                [
+                 m4_warn([syntax], [Unsupported attribute $1, the test may fail])
+                 int foo( void ) __attribute__(($1));
+                ]
+            )], [])
+            ],
+            dnl GCC doesn't exit with an error if an unknown attribute is
+            dnl provided but only outputs a warning, so accept the attribute
+            dnl only if no warning were issued.
+            [AS_IF([test -s conftest.err],
+                [AS_VAR_SET([ac_var], [no])],
+                [AS_VAR_SET([ac_var], [yes])])],
+            [AS_VAR_SET([ac_var], [no])])
+    ])
+
+    AS_IF([test yes = AS_VAR_GET([ac_var])],
+        [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_FUNC_ATTRIBUTE_$1), 1,
+            [Define to 1 if the system has the `$1' function attribute])], [])
+
+    AS_VAR_POPDEF([ac_var])
+])
index 61de0b48f68dfb29e681c6684630d0bb544e60f5..bbf491cb3de559c0a4f9406dd6458db504e0f130 100644 (file)
@@ -275,8 +275,7 @@ esac
 
 dnl Mark symbols hidden by default if the compiler (for example, gcc >= 4)
 dnl supports it. This can help reduce the binary size and startup time.
-AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],
-                       [CFLAGS="$CFLAGS -fvisibility=hidden"])
+dnl AX_CHECK_COMPILE_FLAG([-fvisibility=hidden], [CFLAGS="$CFLAGS -fvisibility=hidden"])
 
 case $host_alias in
   *solaris*)
@@ -495,7 +494,8 @@ utime.h \
 sys/utsname.h \
 sys/ipc.h \
 dlfcn.h \
-assert.h
+assert.h \
+nmmintrin.h
 ],[],[],[
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h>
@@ -565,6 +565,8 @@ dnl Check __builtin_ssubl_overflow
 PHP_CHECK_BUILTIN_SSUBL_OVERFLOW
 dnl Check __builtin_ssubll_overflow
 PHP_CHECK_BUILTIN_SSUBLL_OVERFLOW
+dnl Check __builtin_cpu_init
+PHP_CHECK_BUILTIN_CPU_INIT
 
 dnl Check for members of the stat structure
 AC_STRUCT_ST_BLKSIZE
@@ -585,6 +587,10 @@ AC_TYPE_UID_T
 dnl Checks for sockaddr_storage and sockaddr.sa_len
 PHP_SOCKADDR_CHECKS
 
+AC_MSG_CHECKING([checking building environment])
+AX_GCC_FUNC_ATTRIBUTE([ifunc])
+AX_GCC_FUNC_ATTRIBUTE([target])
+
 dnl Check for IPv6 support
 AC_CACHE_CHECK([for IPv6 support], ac_cv_ipv6_support,
 [AC_TRY_LINK([ #include <sys/types.h>
index 97a20fb2953de944a8832dcc5bfc8a9268010fbb..0c491dde0ca7f99c2762dee4caa9ca47b9d7354e 100644 (file)
@@ -34,6 +34,7 @@
 #ifdef HAVE_MONETARY_H
 # include <monetary.h>
 #endif
+
 /*
  * This define is here because some versions of libintl redefine setlocale
  * to point to libintl_setlocale.  That's a ridiculous thing to do as far
@@ -3863,10 +3864,36 @@ PHPAPI zend_string *php_addcslashes(zend_string *str, int should_free, char *wha
 }
 /* }}} */
 
-/* {{{ php_addslashes
+/* {{{ php_addslashes */
+#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && HAVE_FUNC_ATTRIBUTE_IFUNC && HAVE_FUNC_ATTRIBUTE_TARGET && HAVE_NMMINTRIN_H
+
+#include <nmmintrin.h>
+#include "Zend/zend_bitset.h"
+
+PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes")));
+
+zend_string *php_addslashes_sse4(zend_string *str, int should_free) __attribute__((target("sse4.2")));
+zend_string *php_addslashes_default(zend_string *str, int should_free);
+
+/* {{{ resolve_addslashes */
+static void *resolve_addslashes() {
+#if PHP_HAVE_BUILTIN_CPU_INIT
+       __builtin_cpu_init();
+       if (__builtin_cpu_supports("sse4.2")) {
+               return php_addslashes_sse4;
+       }
+#endif
+       return  php_addslashes_default;
+}
+/* }}} */
+
+/* {{{ php_addslashes_sse4
  */
-PHPAPI zend_string *php_addslashes(zend_string *str, int should_free)
+zend_string *php_addslashes_sse4(zend_string *str, int should_free)
 {
+       SET_ALIGNED(16, static const char slashchars[16]) = "\'\"\\\0";
+       __m128i w128, s128;
+       uint32_t res = 0;
        /* maximum string length, worst case situation */
        char *source, *target;
        char *end;
@@ -3880,6 +3907,34 @@ PHPAPI zend_string *php_addslashes(zend_string *str, int should_free)
        source = ZSTR_VAL(str);
        end = source + ZSTR_LEN(str);
 
+       if (ZSTR_LEN(str) > 15) {
+               char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15);
+
+               if (UNEXPECTED(source != aligned)) {
+                       do {
+                               switch (*source) {
+                                       case '\0':
+                                       case '\'':
+                                       case '\"':
+                                       case '\\':
+                                               goto do_escape;
+                                       default:
+                                               source++;
+                                               break;
+                               }
+                       } while (source < aligned);
+               }
+
+               w128 = _mm_load_si128((__m128i *)slashchars);
+               for (;end - source > 15; source += 16) {
+                       s128 = _mm_load_si128((__m128i *)source);
+                       res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
+                       if (res) {
+                               goto do_escape;
+                       }
+               }
+       }
+
        while (source < end) {
                switch (*source) {
                        case '\0':
@@ -3905,6 +3960,83 @@ do_escape:
        memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
        target = ZSTR_VAL(new_str) + offset;
 
+       if (res) {
+               int pos = 0;
+               do {
+                       int i, n = zend_ulong_ntz(res);
+                       for (i = 0; i < n; i++) {
+                               *target++ = source[pos + i];
+                       }
+                       pos += n;
+                       *target++ = '\\';
+                       if (source[pos] == '\0') {
+                               *target++ = '0';
+                       } else {
+                               *target++ = source[pos];
+                       }
+                       pos++;
+                       res = res >> (n + 1);
+               } while (res);
+
+               for (; pos < 16; pos++) {
+                       *target++ = source[pos];
+               }
+               source += 16;
+       } else if (end - source > 15) {
+               char *aligned = (char*)(((zend_uintptr_t)source + 15) & ~15);
+
+               if (source != aligned) {
+                       do {
+                               switch (*source) {
+                                       case '\0':
+                                               *target++ = '\\';
+                                               *target++ = '0';
+                                               break;
+                                       case '\'':
+                                       case '\"':
+                                       case '\\':
+                                               *target++ = '\\';
+                                               /* break is missing *intentionally* */
+                                       default:
+                                               *target++ = *source;
+                                               break;
+                               }
+                               source++;
+                       } while (source < aligned);
+               }
+
+               w128 = _mm_load_si128((__m128i *)slashchars);
+       }
+
+       for (; end - source > 15; source += 16) {
+               int pos = 0;
+               s128 = _mm_load_si128((__m128i *)source);
+               res = _mm_cvtsi128_si32(_mm_cmpestrm(w128, 4, s128, 16, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK));
+               if (res) {
+                       do {
+                               int i, n = zend_ulong_ntz(res);
+                               for (i = 0; i < n; i++) {
+                                       *target++ = source[pos + i];
+                               }
+                               pos += n;
+                               *target++ = '\\';
+                               if (source[pos] == '\0') {
+                                       *target++ = '0';
+                               } else {
+                                       *target++ = source[pos];
+                               }
+                               pos++;
+                               res = res >> (n + 1);
+                       } while (res);
+                       for (; pos < 16; pos++) {
+                               *target++ = source[pos];
+                       }
+               } else {
+                       _mm_storeu_si128((__m128i*)target, s128);
+                       target += 16;
+               }
+       }
+
        while (source < end) {
                switch (*source) {
                        case '\0':
@@ -3920,11 +4052,88 @@ do_escape:
                                *target++ = *source;
                                break;
                }
+               source++;
+       }
+
+       *target = '\0';
+       if (should_free) {
+               zend_string_release(str);
+       }
+
+       if (ZSTR_LEN(new_str) - (target - ZSTR_VAL(new_str)) > 16) {
+               new_str = zend_string_truncate(new_str, target - ZSTR_VAL(new_str), 0);
+       } else {
+               ZSTR_LEN(new_str) = target - ZSTR_VAL(new_str);
+       }
+
+       return new_str;
+}
+/* }}} */
+
+/* {{{ php_addslashes_default
+ */
+zend_string *php_addslashes_default(zend_string *str, int should_free)
+#else
+zend_string *php_addslashes(zend_string *str, int should_free)
+#endif
+{
+       /* maximum string length, worst case situation */
+       char *source, *target;
+       char *end;
+       size_t offset;
+       zend_string *new_str;
+
+       if (!str) {
+               return ZSTR_EMPTY_ALLOC();
+       }
+
+       source = ZSTR_VAL(str);
+       end = source + ZSTR_LEN(str);
+
+       while (source < end) {
+               switch (*source) {
+                       case '\0':
+                       case '\'':
+                       case '\"':
+                       case '\\':
+                               goto do_escape;
+                       default:
+                               source++;
+                               break;
+               }
+       }
+
+       if (!should_free) {
+               return zend_string_copy(str);
+       }
+
+       return str;
+
+do_escape:
+       offset = source - (char *)ZSTR_VAL(str);
+       new_str = zend_string_safe_alloc(2, ZSTR_LEN(str) - offset, offset, 0);
+       memcpy(ZSTR_VAL(new_str), ZSTR_VAL(str), offset);
+       target = ZSTR_VAL(new_str) + offset;
 
+       while (source < end) {
+               switch (*source) {
+                       case '\0':
+                               *target++ = '\\';
+                               *target++ = '0';
+                               break;
+                       case '\'':
+                       case '\"':
+                       case '\\':
+                               *target++ = '\\';
+                               /* break is missing *intentionally* */
+                       default:
+                               *target++ = *source;
+                               break;
+               }
                source++;
        }
 
-       *target = 0;
+       *target = '\0';
        if (should_free) {
                zend_string_release(str);
        }
@@ -3938,6 +4147,7 @@ do_escape:
        return new_str;
 }
 /* }}} */
+/* }}} */
 
 #define _HEB_BLOCK_TYPE_ENG 1
 #define _HEB_BLOCK_TYPE_HEB 2