]> granicus.if.org Git - php/commitdiff
Optimized base64_encode/decode with SIMD instructions
authorXinchen Hui <laruence@gmail.com>
Mon, 12 Feb 2018 12:46:17 +0000 (20:46 +0800)
committerXinchen Hui <laruence@gmail.com>
Mon, 12 Feb 2018 12:53:14 +0000 (20:53 +0800)
UPGRADING.INTERNALS
Zend/zend_cpuinfo.h
Zend/zend_portability.h
acinclude.m4
configure.ac
ext/standard/base64.c
ext/standard/base64.h
ext/standard/basic_functions.c
ext/standard/string.c

index ae1062675f410a8ff49e970e7c81ecb194b8441a..092df3d2d36917815a648a3b971af6763c0b0120 100644 (file)
@@ -104,6 +104,9 @@ PHP 7.3 INTERNALS UPGRADE NOTES
      point in the past.
 
   m. zend_cpu_supports() determines if a feature is supported by current cpu.
+     Also serial inline zend_cpu_supports_xxx() are added, which is designed for
+     ifunc resolver function, as resolver function should not depend on any
+     external function.
 
   n. IS_TYPE_COPYABLE flag is removed. IS_STRING zvals didn't need to be
      duplication by zval_copy_ctor(), ZVAL_DUP() and SEPARATE_ZVAL*() macros.
index d0d3a936e1175a58dcbe5eb3884ecb78c7a27b96..f0c298db6805ca258cd4e5a46881f8e614a0502d 100644 (file)
@@ -106,42 +106,42 @@ ZEND_API int zend_cpu_supports(zend_cpu_feature feature);
  * before all PLT symbols are resloved. in other words,
  * resolver functions should not depends any external
  * functions */
-static zend_always_inline int zend_cpu_support_sse2() {
+static zend_always_inline int zend_cpu_supports_sse2() {
 #if PHP_HAVE_BUILTIN_CPU_INIT
        __builtin_cpu_init();
 #endif
        return __builtin_cpu_supports("sse2");
 }
 
-static zend_always_inline int zend_cpu_support_sse3() {
+static zend_always_inline int zend_cpu_supports_ssse3() {
 #if PHP_HAVE_BUILTIN_CPU_INIT
        __builtin_cpu_init();
 #endif
-       return __builtin_cpu_supports("sse3");
+       return __builtin_cpu_supports("ssse3");
 }
 
-static zend_always_inline int zend_cpu_support_sse41() {
+static zend_always_inline int zend_cpu_supports_sse41() {
 #if PHP_HAVE_BUILTIN_CPU_INIT
        __builtin_cpu_init();
 #endif
        return __builtin_cpu_supports("sse4.1");
 }
 
-static zend_always_inline int zend_cpu_support_sse42() {
+static zend_always_inline int zend_cpu_supports_sse42() {
 #if PHP_HAVE_BUILTIN_CPU_INIT
        __builtin_cpu_init();
 #endif
        return __builtin_cpu_supports("sse4.2");
 }
 
-static zend_always_inline int zend_cpu_support_avx() {
+static zend_always_inline int zend_cpu_supports_avx() {
 #if PHP_HAVE_BUILTIN_CPU_INIT
        __builtin_cpu_init();
 #endif
        return __builtin_cpu_supports("avx");
 }
 
-static zend_always_inline int zend_cpu_support_avx2() {
+static zend_always_inline int zend_cpu_supports_avx2() {
 #if PHP_HAVE_BUILTIN_CPU_INIT
        __builtin_cpu_init();
 #endif
@@ -149,29 +149,28 @@ static zend_always_inline int zend_cpu_support_avx2() {
 }
 #else
 
-static zend_always_inline int zend_cpu_support_sse2() {
+static zend_always_inline int zend_cpu_supports_sse2() {
        return zend_cpu_supports(ZEND_CPU_FEATURE_SSE2);
 }
 
-static zend_always_inline int zend_cpu_support_sse3() {
-       return zend_cpu_supports(ZEND_CPU_FEATURE_SSE3);
+static zend_always_inline int zend_cpu_supports_ssse3() {
+       return zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3);
 }
 
-static zend_always_inline int zend_cpu_support_sse41() {
+static zend_always_inline int zend_cpu_supports_sse41() {
        return zend_cpu_supports(ZEND_CPU_FEATURE_SSE41);
 }
 
-static zend_always_inline int zend_cpu_support_sse42() {
+static zend_always_inline int zend_cpu_supports_sse42() {
        return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42);
 }
 
-static zend_always_inline int zend_cpu_support_avx() {
+static zend_always_inline int zend_cpu_supports_avx() {
        return zend_cpu_supports(ZEND_CPU_FEATURE_AVX);
 }
 
-static zend_always_inline int zend_cpu_support_avx2() {
-       /* TODO */
-       return 0;
+static zend_always_inline int zend_cpu_supports_avx2() {
+       return zend_cpu_supports(ZEND_CPU_FEATURE_AVX2);
 }
 
 #endif
index bd3e23edfe85ee89bfb212ecbb1a4cd0b90a08e0..abf7dcc5a4c74691d26a1d388a50c2d225419337 100644 (file)
@@ -520,10 +520,48 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */
 # define ZEND_INTRIN_HAVE_IFUNC_TARGET 1
 #endif
 
+#if (defined(__i386__) || defined(__x86_64__))
+# if PHP_HAVE_SSSE3_INSTRUCTIONS && defined(HAVE_TMMINTRIN_H)
+# define PHP_HAVE_SSSE3
+# endif
+
+# if PHP_HAVE_SSE4_2_INSTRUCTIONS && defined(HAVE_NMMINTRIN_H)
+# define PHP_HAVE_SSE4_2
+# endif
+
+# if PHP_HAVE_AVX2_INSTRUCTIONS && defined(HAVE_IMMINTRIN_H)
+# define PHP_HAVE_AVX2
+# endif
+#endif
+
+#ifdef __SSSE3__
+/* Instructions compiled directly. */
+# define ZEND_INTRIN_SSSE3_NATIVE 1
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSSE3)) || defined(ZEND_WIN32)
+/* Function resolved by ifunc or MINIT. */
+# define ZEND_INTRIN_SSSE3_RESOLVER 1
+#endif
+
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER)
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
+#elif ZEND_INTRIN_SSSE3_RESOLVER
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
+#endif
+
+#if ZEND_INTRIN_SSSE3_RESOLVER
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
+# else
+#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) func
+# endif
+#else
+# define ZEND_INTRIN_SSSE3_FUNC_DECL(func)
+#endif
+
 #ifdef __SSE4_2__
 /* Instructions compiled directly. */
 # define ZEND_INTRIN_SSE4_2_NATIVE 1
-#elif (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_NMMINTRIN_H) || defined(ZEND_WIN32)
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSE4_2)) || defined(ZEND_WIN32)
 /* Function resolved by ifunc or MINIT. */
 # define ZEND_INTRIN_SSE4_2_RESOLVER 1
 #endif
@@ -544,6 +582,30 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */
 # define ZEND_INTRIN_SSE4_2_FUNC_DECL(func)
 #endif
 
+#ifdef __AVX2__
+/* Instructions compiled directly. */
+# define ZEND_INTRIN_AVX2_NATIVE 1
+#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32)
+/* Function resolved by ifunc or MINIT. */
+# define ZEND_INTRIN_AVX2_RESOLVER 1
+#endif
+
+#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER)
+# define ZEND_INTRIN_AVX2_FUNC_PROTO 1
+#elif ZEND_INTRIN_AVX2_RESOLVER
+# define ZEND_INTRIN_AVX2_FUNC_PTR 1
+#endif
+
+#if ZEND_INTRIN_AVX2_RESOLVER
+# if defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+#  define ZEND_INTRIN_AVX2_FUNC_DECL(func) ZEND_API func __attribute__((target("avx2")))
+# else
+#  define ZEND_INTRIN_AVX2_FUNC_DECL(func) func
+# endif
+#else
+# define ZEND_INTRIN_AVX2_FUNC_DECL(func)
+#endif
+
 /* Intrinsics macros end. */
 
 #ifdef ZEND_WIN32
index 5c6a5c5f8489d14d9b4ea74ebd27e22ae873aa80..b8902f094731eef89e871919e5071703ed9171f6 100644 (file)
@@ -3271,7 +3271,7 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [
   AC_MSG_CHECKING([for __builtin_cpu_supports])
 
   AC_TRY_LINK(, [
-    return __builtin_cpu_supports("sse2")? 1 : 0;
+    return __builtin_cpu_supports("sse")? 1 : 0;
   ], [
     have_builtin_cpu_supports=1
     AC_MSG_RESULT([yes])
@@ -3282,7 +3282,28 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [
 
   AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_SUPPORTS],
    [$have_builtin_cpu_supports], [Whether the compiler supports __builtin_cpu_supports])
+])
 
+dnl PHP_CHECK_CPU_SUPPORTS
+AC_DEFUN([PHP_CHECK_CPU_SUPPORTS], [
+  AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_INIT])
+  AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_SUPPORTS])
+  have_ext_instructions=0
+  if test $have_builtin_cpu_supports = 1; then
+    AC_MSG_CHECKING([for $1 instructions supports])
+    AC_TRY_RUN([
+int main() {
+       return __builtin_cpu_supports("$1")? 0 : 1;
+}
+    ], [
+      have_ext_instructions=1
+      AC_MSG_RESULT([yes])
+    ], [
+      AC_MSG_RESULT([no])
+    ])
+  fi
+  AC_DEFINE_UNQUOTED(AS_TR_CPP([PHP_HAVE_$1_INSTRUCTIONS]),
+   [$have_ext_instructions], [Whether the compiler supports $1 instructions])
 ])
 
 dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive.
index 811faa97ed8eae5bb83c56eec7f816384c170eba..531e6b4244a3d53385443e9a60ec5364e4d9c655 100644 (file)
@@ -496,7 +496,9 @@ sys/utsname.h \
 sys/ipc.h \
 dlfcn.h \
 assert.h \
-nmmintrin.h
+tmmintrin.h \
+nmmintrin.h \
+immintrin.h
 ],[],[],[
 #ifdef HAVE_SYS_PARAM_H
 #include <sys/param.h>
@@ -571,6 +573,12 @@ PHP_CHECK_BUILTIN_CPU_INIT
 dnl Check __builtin_cpu_supports
 PHP_CHECK_BUILTIN_CPU_SUPPORTS
 
+dnl Check instructions
+PHP_CHECK_CPU_SUPPORTS([ssse3])
+PHP_CHECK_CPU_SUPPORTS([sse4.2])
+PHP_CHECK_CPU_SUPPORTS([avx])
+PHP_CHECK_CPU_SUPPORTS([avx2])
+
 dnl Check for members of the stat structure
 AC_STRUCT_ST_BLKSIZE
 dnl AC_STRUCT_ST_BLOCKS will screw QNX because fileblocks.o does not exists
@@ -590,7 +598,6 @@ AC_TYPE_UID_T
 dnl Checks for sockaddr_storage and sockaddr.sa_len
 PHP_SOCKADDR_CHECKS
 
-AC_MSG_CHECKING([checking building environment])
 AX_GCC_FUNC_ATTRIBUTE([ifunc])
 AX_GCC_FUNC_ATTRIBUTE([target])
 
index 06856b82216153da70475338169b9c397d497c84..ae2b11e188df89cdb22bf66704a56f5637ecd46d 100644 (file)
@@ -53,47 +53,439 @@ static const short base64_reverse_table[256] = {
 };
 /* }}} */
 
-PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) /* {{{ */
+static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */
 {
-       const unsigned char *current = str;
-       unsigned char *p;
-       zend_string *result;
-
-       result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
-       p = (unsigned char *)ZSTR_VAL(result);
 
-       while (length > 2) { /* keep going until we have less than 24 bits */
-               *p++ = base64_table[current[0] >> 2];
-               *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
-               *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
-               *p++ = base64_table[current[2] & 0x3f];
+       while (inl > 2) { /* keep going until we have less than 24 bits */
+               *out++ = base64_table[in[0] >> 2];
+               *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
+               *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
+               *out++ = base64_table[in[2] & 0x3f];
 
-               current += 3;
-               length -= 3; /* we just handle 3 octets of data */
+               in += 3;
+               inl -= 3; /* we just handle 3 octets of data */
        }
 
        /* now deal with the tail end of things */
-       if (length != 0) {
-               *p++ = base64_table[current[0] >> 2];
-               if (length > 1) {
-                       *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
-                       *p++ = base64_table[(current[1] & 0x0f) << 2];
-                       *p++ = base64_pad;
+       if (inl != 0) {
+               *out++ = base64_table[in[0] >> 2];
+               if (inl > 1) {
+                       *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
+                       *out++ = base64_table[(in[1] & 0x0f) << 2];
+                       *out++ = base64_pad;
                } else {
-                       *p++ = base64_table[(current[0] & 0x03) << 4];
-                       *p++ = base64_pad;
-                       *p++ = base64_pad;
+                       *out++ = base64_table[(in[0] & 0x03) << 4];
+                       *out++ = base64_pad;
+                       *out++ = base64_pad;
                }
        }
-       *p = '\0';
 
-       ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
+       *out = '\0';
+
+       return out;
+}
+/* }}} */
+
+static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */
+{
+       int ch, i = 0, padding = 0, j = *outl;
+
+       /* run through the whole string, converting as we go */
+       while (inl-- > 0) {
+               ch = *in++;
+               if (ch == base64_pad) {
+                       padding++;
+                       continue;
+               }
+
+               ch = base64_reverse_table[ch];
+               if (!strict) {
+                       /* skip unknown characters and whitespace */
+                       if (ch < 0) {
+                               continue;
+                       }
+               } else {
+                       /* skip whitespace */
+                       if (ch == -1) {
+                               continue;
+                       }
+                       /* fail on bad characters or if any data follows padding */
+                       if (ch == -2 || padding) {
+                               goto fail;
+                       }
+               }
+
+               switch (i % 4) {
+                       case 0:
+                               out[j] = ch << 2;
+                               break;
+                       case 1:
+                               out[j++] |= ch >> 4;
+                               out[j] = (ch & 0x0f) << 4;
+                               break;
+                       case 2:
+                               out[j++] |= ch >>2;
+                               out[j] = (ch & 0x03) << 6;
+                               break;
+                       case 3:
+                               out[j++] |= ch;
+                               break;
+               }
+               i++;
+       }
+
+       /* fail if the input is truncated (only one char in last group) */
+       if (strict && i % 4 == 1) {
+               goto fail;
+       }
+
+       /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
+        * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
+       if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
+               goto fail;
+       }
+
+       *outl = j;
+       out[j] = '\0';
+
+       return 1;
+
+fail:
+       return 0;
+}
+/* }}} */
+
+/* {{{ php_base64_encode */
+
+#if ZEND_INTRIN_AVX2_NATIVE
+# undef ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_RESOLVER
+# undef ZEND_INTRIN_SSSE3_FUNC_PROTO
+# undef ZEND_INTRIN_SSSE3_FUNC_PTR
+#elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_NATIVE
+# define ZEND_INTRIN_SSSE3_RESOLVER 1
+# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
+# undef ZEND_INTRIN_SSSE3_FUNC_DECL
+# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
+#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
+# else
+#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
+# endif
+#elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_NATIVE
+# undef ZEND_INTRIN_SSSE3_RESOLVER
+# define ZEND_INTRIN_SSSE3_RESOLVER 1
+# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
+# undef ZEND_INTRIN_SSSE3_FUNC_DECL
+# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
+#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
+# else
+#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
+# endif
+#endif
+
+#if ZEND_INTRIN_AVX2_NATIVE
+# include <immintrin.h>
+#elif ZEND_INTRIN_SSSE3_NATIVE
+# include <tmmintrin.h>
+#elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER)
+# if ZEND_INTRIN_AVX2_RESOLVER
+#  include <immintrin.h>
+# else
+#  include <tmmintrin.h>
+# endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
+# include "Zend/zend_cpuinfo.h"
+
+# if ZEND_INTRIN_AVX2_RESOLVER
+ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length));
+ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict));
+# endif
+
+# if ZEND_INTRIN_SSSE3_RESOLVER
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length));
+ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict));
+# endif
+
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length);
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict);
+
+# if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO)
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode")));
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode")));
+
+static void *resolve_base64_encode() {
+# if ZEND_INTRIN_AVX2_FUNC_PROTO
+       if (zend_cpu_supports_avx2()) {
+               return php_base64_encode_avx2;
+       } else
+# endif
+       if (zend_cpu_supports_ssse3()) {
+               return php_base64_encode_ssse3;
+       }
+       return  php_base64_encode_default;
+}
+
+static void *resolve_base64_decode() {
+# if ZEND_INTRIN_AVX2_FUNC_PROTO
+       if (zend_cpu_supports_avx2()) {
+               return php_base64_decode_ex_avx2;
+       } else
+# endif
+       if (zend_cpu_supports_ssse3()) {
+               return php_base64_decode_ex_ssse3;
+       }
+       return  php_base64_decode_ex_default;
+}
+# else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
+
+PHPAPI zend_string *(*php_base64_encode)(const unsigned char *str, size_t length) = NULL;
+PHPAPI zend_string *(*php_base64_decode_ex)(const unsigned char *str, size_t length, zend_bool strict) = NULL;
+
+PHP_MINIT_FUNCTION(base64_intrin)
+{
+# if ZEND_INTRIN_AVX2_FUNC_PTR
+       if (zend_cpu_supports_avx2()) {
+               php_base64_encode = php_base64_encode_avx2;
+               php_base64_decode_ex = php_base64_decode_ex_avx2;
+       } else
+# endif
+       if (zend_cpu_supports_ssse3()) {
+               php_base64_encode = php_base64_encode_ssse3;
+               php_base64_decode_ex = php_base64_decode_ex_ssse3;
+       } else {
+               php_base64_encode = php_base64_encode_default;
+               php_base64_decode_ex = php_base64_decode_ex_default;
+       }
+       return SUCCESS;
+}
+# endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
+#endif /* ZEND_INTRIN_AVX2_NATIVE */
+
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
+static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
+# endif
+static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
+{
+       /* This one works with shifted (4 bytes) input in order to
+        * be able to work efficiently in the 2 128-bit lanes */
+       __m256i t0, t1, t2, t3;
+
+       /* input, bytes MSB to LSB:
+        * 0 0 0 0 x w v u t s r q p o n m
+        * l k j i h g f e d c b a 0 0 0 0 */
+       in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
+               10, 11,  9, 10,
+                7,  8,  6,  7,
+                4,  5,  3,  4,
+                1,  2,  0,  1,
+
+               14, 15, 13, 14,
+               11, 12, 10, 11,
+                8,  9,  7,  8,
+                5,  6,  4,  5));
+
+       t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
+
+       t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
+
+       t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
+
+       t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
+
+       return _mm256_or_si256(t1, t3);
+       /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
+        * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
+        * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
+        * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
+        * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+        * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+        * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+        * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
+}
+
+static __m256i php_base64_encode_avx2_translate(__m256i in)
+{
+       __m256i lut, indices, mask;
+
+       lut = _mm256_setr_epi8(
+                       65, 71, -4, -4, -4, -4, -4, -4,
+                       -4, -4, -4, -4, -19, -16, 0, 0,
+                       65, 71, -4, -4, -4, -4, -4, -4,
+                       -4, -4, -4, -4, -19, -16, 0, 0);
+
+       indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
+
+       mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
+
+       indices = _mm256_sub_epi8(indices, mask);
+
+       return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
+
+}
+#endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
+
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
+static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
+# endif
+
+static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
+{
+       __m128i t0, t1, t2, t3;
+
+       /* input, bytes MSB to LSB:
+        * 0 0 0 0 l k j i h g f e d c b a */
+       in = _mm_shuffle_epi8(in, _mm_set_epi8(
+                               10, 11,  9, 10,
+                               7,  8,  6,  7,
+                               4,  5,  3,  4,
+                               1,  2,  0,  1));
+
+       t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
+
+       t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
+
+       t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
+
+       t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
+
+       /* output (upper case are MSB, lower case are LSB):
+        * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
+        * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
+        * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
+        * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
+       return _mm_or_si128(t1, t3);
+}
+
+static __m128i php_base64_encode_ssse3_translate(__m128i in)
+{
+       __m128i mask, indices;
+       __m128i lut = _mm_setr_epi8(
+                       65,  71, -4, -4,
+                       -4,  -4, -4, -4,
+                       -4,  -4, -4, -4,
+                       -19, -16,  0,  0
+                       );
+
+       /* Translate values 0..63 to the Base64 alphabet. There are five sets:
+        * #  From      To         Abs    Index  Characters
+        * 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
+        * 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
+        * 2  [52..61]  [48..57]    -4  [2..11]  0123456789
+        * 3  [62]      [43]       -19       12  +
+        * 4  [63]      [47]       -16       13  / */
+
+       /* Create LUT indices from input:
+        * the index for range #0 is right, others are 1 less than expected: */
+       indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
+
+       /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
+       mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
+
+       /* substract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
+       indices = _mm_sub_epi8(indices, mask);
+
+       /* Add offsets to input values: */
+       return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
+}
+#endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
+
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
+# elif ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)
+# elif ZEND_INTRIN_SSSE3_RESOLVER
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
+# endif
+{
+       const unsigned char *c = str;
+       unsigned char *o;
+       zend_string *result;
+
+       result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
+       o = (unsigned char *)ZSTR_VAL(result);
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+       if (length > 31) {
+               __m256i s = _mm256_loadu_si256((__m256i *)c);
+
+               s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
+
+               for (;;) {
+                       s = php_base64_encode_avx2_reshuffle(s);
+
+                       s = php_base64_encode_avx2_translate(s);
+
+                       _mm256_storeu_si256((__m256i *)o, s);
+                       c += 24;
+                       o += 32;
+                       length -= 24;
+                       if (length < 28) {
+                               break;
+                       }
+                       s = _mm256_loadu_si256((__m256i *)(c - 4));
+               }
+       }
+# else
+       while (length > 15) {
+               __m128i s = _mm_loadu_si128((__m128i *)c);
+
+               s = php_base64_encode_ssse3_reshuffle(s);
+
+               s = php_base64_encode_ssse3_translate(s);
+
+               _mm_storeu_si128((__m128i *)o, s);
+               c += 12;
+               o += 16;
+               length -= 12;
+       }
+# endif
+
+       o = php_base64_encode_impl(c, length, o);
+
+       ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
 
        return result;
 }
+
+# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)
+{
+       const unsigned char *c = str;
+       unsigned char *o;
+       zend_string *result;
+
+       result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
+       o = (unsigned char *)ZSTR_VAL(result);
+       while (length > 15) {
+               __m128i s = _mm_loadu_si128((__m128i *)c);
+
+               s = php_base64_encode_ssse3_reshuffle(s);
+
+               s = php_base64_encode_ssse3_translate(s);
+
+               _mm_storeu_si128((__m128i *)o, s);
+               c += 12;
+               o += 16;
+               length -= 12;
+       }
+
+       o = php_base64_encode_impl(c, length, o);
+
+       ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
+
+       return result;
+}
+# endif
+#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
+
 /* }}} */
 
-/* {{{ */
+/* {{{ php_base64_decode_ex */
 /* generate reverse table (do not set index 0 to 64)
 static unsigned short base64_reverse_table[256];
 #define rt base64_reverse_table
@@ -125,78 +517,300 @@ void php_base64_init(void)
        efree(s);
 }
 */
-/* }}} */
 
-PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) /* {{{ */
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
+# endif
+
+static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
 {
-       const unsigned char *current = str;
-       int ch, i = 0, j = 0, padding = 0;
+       __m256i merge_ab_and_bc, out;
+
+       merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
+
+       out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
+
+       out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+                               2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
+                               2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
+
+       return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
+}
+#endif
+
+#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
+static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
+# endif
+
+static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
+{
+       __m128i merge_ab_and_bc, out;
+
+       merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
+       /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
+        * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
+        * 0000eeee FFffffff 0000DDDD DDddEEEE
+        * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
+
+       out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
+       /* 00000000 JJJJJJjj KKKKkkkk LLllllll
+        * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
+        * 00000000 DDDDDDdd EEEEeeee FFffffff
+        * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
+
+       return  _mm_shuffle_epi8(out, _mm_setr_epi8(
+                2,  1,  0,
+                6,  5,  4,
+               10,  9,  8,
+               14, 13, 12,
+               -1, -1, -1, -1));
+       /* 00000000 00000000 00000000 00000000
+        * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
+        * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
+        * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
+}
+#endif
+
+#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
+# elif ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)
+# else
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
+# endif
+{
+       const unsigned char *c = str;
+       unsigned char *o;
+       size_t outl = 0;
        zend_string *result;
 
        result = zend_string_alloc(length, 0);
+       o = (unsigned char *)ZSTR_VAL(result);
 
-       /* run through the whole string, converting as we go */
-       while (length-- > 0) {
-               ch = *current++;
-               if (ch == base64_pad) {
-                       padding++;
-                       continue;
-               }
+       /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
+       * https://arxiv.org/pdf/1704.00605.pdf */
+# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER
+       while (length > 31 + 11 + 2) {
+               __m256i lut_lo, lut_hi, lut_roll;
+               __m256i hi_nibbles, lo_nibbles, hi, lo;
+               __m256i str = _mm256_loadu_si256((__m256i *)c);
 
-               ch = base64_reverse_table[ch];
-               if (!strict) {
-                       /* skip unknown characters and whitespace */
-                       if (ch < 0) {
-                               continue;
-                       }
+               lut_lo = _mm256_setr_epi8(
+                               0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+                               0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
+                               0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+                               0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+               lut_hi = _mm256_setr_epi8(
+                               0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+                               0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
+                               0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+                               0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+               lut_roll = _mm256_setr_epi8(
+                               0,  16,  19,   4, -65, -65, -71, -71,
+                               0,   0,   0,   0,   0,   0,   0,   0,
+                               0,  16,  19,   4, -65, -65, -71, -71,
+                               0,   0,   0,   0,   0,   0,   0,   0);
+
+               hi_nibbles  = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
+               lo_nibbles  = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
+               hi          = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
+               lo          = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
+
+               if (!_mm256_testz_si256(lo, hi)) {
+                       break;
                } else {
-                       /* skip whitespace */
-                       if (ch == -1) {
-                               continue;
-                       }
-                       /* fail on bad characters or if any data follows padding */
-                       if (ch == -2 || padding) {
-                               goto fail;
-                       }
+                       __m256i eq_2f, roll;
+                       eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
+                       roll  = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
+
+
+                       str = _mm256_add_epi8(str, roll);
+
+                       str = php_base64_decode_avx2_reshuffle(str);
+
+                       _mm256_storeu_si256((__m256i *)o, str);
+
+                       c += 32;
+                       o += 24;
+                       outl += 24;
+                       length -= 32;
                }
+       }
+# else
+       while (length > 15 + 6 + 2) {
+               __m128i lut_lo, lut_hi, lut_roll;
+               __m128i hi_nibbles, lo_nibbles, hi, lo;
 
-               switch(i % 4) {
-               case 0:
-                       ZSTR_VAL(result)[j] = ch << 2;
-                       break;
-               case 1:
-                       ZSTR_VAL(result)[j++] |= ch >> 4;
-                       ZSTR_VAL(result)[j] = (ch & 0x0f) << 4;
-                       break;
-               case 2:
-                       ZSTR_VAL(result)[j++] |= ch >>2;
-                       ZSTR_VAL(result)[j] = (ch & 0x03) << 6;
-                       break;
-               case 3:
-                       ZSTR_VAL(result)[j++] |= ch;
+               __m128i s = _mm_loadu_si128((__m128i *)c);
+
+               lut_lo = _mm_setr_epi8(
+                               0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+                               0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+               lut_hi = _mm_setr_epi8(
+                               0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+                               0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+               lut_roll = _mm_setr_epi8(
+                               0,  16,  19,   4, -65, -65, -71, -71,
+                               0,   0,   0,   0,   0,   0,   0,   0);
+
+               hi_nibbles  = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f));
+               lo_nibbles  = _mm_and_si128(s, _mm_set1_epi8(0x2f));
+               hi          = _mm_shuffle_epi8(lut_hi, hi_nibbles);
+               lo          = _mm_shuffle_epi8(lut_lo, lo_nibbles);
+
+               /* Check for invalid input: if any "and" values from lo and hi are not zero,
+                  fall back on bytewise code to do error checking and reporting: */
+               if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) {
                        break;
+               } else {
+                       __m128i eq_2f, roll;
+
+                       eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f));
+                       roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles));
+
+                       s = _mm_add_epi8(s, roll);
+
+                       s = php_base64_decode_ssse3_reshuffle(s);
+
+                       _mm_storeu_si128((__m128i *)o, s);
+
+                       c += 16;
+                       o += 12;
+                       outl += 12;
+                       length -= 16;
                }
-               i++;
        }
-       /* fail if the input is truncated (only one char in last group) */
-       if (strict && i % 4 == 1) {
-               goto fail;
+# endif
+
+       if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
+               zend_string_free(result);
+               return NULL;
        }
-       /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
-        * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
-       if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
-               goto fail;
+
+       ZSTR_LEN(result) = outl;
+
+       return result;
+}
+
+# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER
+zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)
+{
+       const unsigned char *c = str;
+       unsigned char *o;
+       size_t outl = 0;
+       zend_string *result;
+
+       result = zend_string_alloc(length, 0);
+       o = (unsigned char *)ZSTR_VAL(result);
+
+       while (length > 15 + 2) {
+               __m128i lut_lo, lut_hi, lut_roll;
+               __m128i hi_nibbles, lo_nibbles, hi, lo;
+
+               __m128i s = _mm_loadu_si128((__m128i *)c);
+
+               lut_lo = _mm_setr_epi8(
+                               0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+                               0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
+
+               lut_hi = _mm_setr_epi8(
+                               0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
+                               0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
+
+               lut_roll = _mm_setr_epi8(
+                               0,  16,  19,   4, -65, -65, -71, -71,
+                               0,   0,   0,   0,   0,   0,   0,   0);
+
+               hi_nibbles  = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f));
+               lo_nibbles  = _mm_and_si128(s, _mm_set1_epi8(0x2f));
+               hi          = _mm_shuffle_epi8(lut_hi, hi_nibbles);
+               lo          = _mm_shuffle_epi8(lut_lo, lo_nibbles);
+
+               /* Check for invalid input: if any "and" values from lo and hi are not zero,
+                  fall back on bytewise code to do error checking and reporting: */
+               if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) {
+                       break;
+               } else {
+                       __m128i eq_2f, roll;
+
+                       eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f));
+                       roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles));
+
+                       s = _mm_add_epi8(s, roll);
+
+                       s = php_base64_decode_ssse3_reshuffle(s);
+
+                       _mm_storeu_si128((__m128i *)o, s);
+
+                       c += 16;
+                       o += 12;
+                       outl += 12;
+                       length -= 16;
+               }
+       }
+
+       if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
+               zend_string_free(result);
+               return NULL;
        }
 
-       ZSTR_LEN(result) = j;
-       ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0';
+       ZSTR_LEN(result) = outl;
 
        return result;
+}
+# endif
+#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
 
-fail:
-       zend_string_free(result);
-       return NULL;
+#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
+#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
+zend_string *php_base64_encode_default(const unsigned char *str, size_t length)
+#else
+PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length)
+#endif
+{
+       unsigned char *p;
+       zend_string *result;
+
+       result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
+       p = (unsigned char *)ZSTR_VAL(result);
+
+       p = php_base64_encode_impl(str, length, p);
+
+       ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
+
+       return result;
+}
+#endif
+
+#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE
+#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER
+zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict)
+#else
+PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict)
+#endif
+{
+       zend_string *result;
+       size_t outl = 0;
+
+       result = zend_string_alloc(length, 0);
+
+       if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
+               zend_string_free(result);
+               return NULL;
+       }
+
+       ZSTR_LEN(result) = outl;
+
+       return result;
 }
+#endif
 /* }}} */
 
 /* {{{ proto string base64_encode(string str)
index f380d3c888f9ca9d5c5c846972649af9bf193fbc..c9fe26e985535a4840956f667a5ede55e9bba461 100644 (file)
 #ifndef BASE64_H
 #define BASE64_H
 
+/*
+ * SSSE3 and AVX2 implementation are based on https://github.com/aklomp/base64
+ * which is copyrighted to:
+ *
+ * Copyright (c) 2005-2007, Nick Galbreath
+ * Copyright (c) 2013-2017, Alfred Klomp
+ * Copyright (c) 2015-2017, Wojciech Mula
+ * Copyright (c) 2016-2017, Matthieu Darbois
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in the
+ *   documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
 PHP_FUNCTION(base64_decode);
 PHP_FUNCTION(base64_encode);
 
+#if (ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR) && !ZEND_INTRIN_AVX2_NATIVE
+PHP_MINIT_FUNCTION(base64_intrin);
+PHPAPI extern zend_string *(*php_base64_encode)(const unsigned char *, size_t);
+PHPAPI extern zend_string *(*php_base64_decode_ex)(const unsigned char *, size_t, zend_bool);
+#else
 PHPAPI extern zend_string *php_base64_encode(const unsigned char *, size_t);
+PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool);
+#endif
+
 static inline zend_string *php_base64_encode_str(const zend_string *str) {
        return php_base64_encode((const unsigned char*)(ZSTR_VAL(str)), ZSTR_LEN(str));
 }
 
-PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool);
-
 static inline zend_string *php_base64_decode(const unsigned char *str, size_t len) {
        return php_base64_decode_ex(str, len, 0);
 }
index b322caa65e3790614b9e0804894ff1c479df1fa5..4404ad1df006eb82f9a8df0e1d8f0dd11eb4bccb 100644 (file)
@@ -3692,6 +3692,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */
        BASIC_MINIT_SUBMODULE(string_intrin)
 #endif
 
+#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR
+       BASIC_MINIT_SUBMODULE(base64_intrin)
+#endif
+
        BASIC_MINIT_SUBMODULE(crypt)
        BASIC_MINIT_SUBMODULE(lcg)
 
index 73bbb2442009e212311d8a66874e3b8df747c566..0023c4ba3b8f860f73766bf9ee3421a74d49eae0 100644 (file)
@@ -3873,7 +3873,7 @@ zend_string *php_addslashes_default(zend_string *str, int should_free);
 PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes")));
 
 static void *resolve_addslashes() {
-       if (zend_cpu_support_sse42()) {
+       if (zend_cpu_supports_sse42()) {
                return php_addslashes_sse42;
        }
        return  php_addslashes_default;