From: Xinchen Hui Date: Mon, 12 Feb 2018 12:46:17 +0000 (+0800) Subject: Optimized base64_encode/decode with SIMD instructions X-Git-Tag: php-7.3.0alpha1~448 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cf0fae5e975f542453e80d9b082bceee98699db5;p=php Optimized base64_encode/decode with SIMD instructions --- diff --git a/UPGRADING.INTERNALS b/UPGRADING.INTERNALS index ae1062675f..092df3d2d3 100644 --- a/UPGRADING.INTERNALS +++ b/UPGRADING.INTERNALS @@ -104,6 +104,9 @@ PHP 7.3 INTERNALS UPGRADE NOTES point in the past. m. zend_cpu_supports() determines if a feature is supported by current cpu. + Also serial inline zend_cpu_supports_xxx() are added, which is designed for + ifunc resolver function, as resolver function should not depend on any + external function. n. IS_TYPE_COPYABLE flag is removed. IS_STRING zvals didn't need to be duplication by zval_copy_ctor(), ZVAL_DUP() and SEPARATE_ZVAL*() macros. diff --git a/Zend/zend_cpuinfo.h b/Zend/zend_cpuinfo.h index d0d3a936e1..f0c298db68 100644 --- a/Zend/zend_cpuinfo.h +++ b/Zend/zend_cpuinfo.h @@ -106,42 +106,42 @@ ZEND_API int zend_cpu_supports(zend_cpu_feature feature); * before all PLT symbols are resloved. in other words, * resolver functions should not depends any external * functions */ -static zend_always_inline int zend_cpu_support_sse2() { +static zend_always_inline int zend_cpu_supports_sse2() { #if PHP_HAVE_BUILTIN_CPU_INIT __builtin_cpu_init(); #endif return __builtin_cpu_supports("sse2"); } -static zend_always_inline int zend_cpu_support_sse3() { +static zend_always_inline int zend_cpu_supports_ssse3() { #if PHP_HAVE_BUILTIN_CPU_INIT __builtin_cpu_init(); #endif - return __builtin_cpu_supports("sse3"); + return __builtin_cpu_supports("ssse3"); } -static zend_always_inline int zend_cpu_support_sse41() { +static zend_always_inline int zend_cpu_supports_sse41() { #if PHP_HAVE_BUILTIN_CPU_INIT __builtin_cpu_init(); #endif return __builtin_cpu_supports("sse4.1"); } -static zend_always_inline int zend_cpu_support_sse42() { +static zend_always_inline int zend_cpu_supports_sse42() { #if PHP_HAVE_BUILTIN_CPU_INIT __builtin_cpu_init(); #endif return __builtin_cpu_supports("sse4.2"); } -static zend_always_inline int zend_cpu_support_avx() { +static zend_always_inline int zend_cpu_supports_avx() { #if PHP_HAVE_BUILTIN_CPU_INIT __builtin_cpu_init(); #endif return __builtin_cpu_supports("avx"); } -static zend_always_inline int zend_cpu_support_avx2() { +static zend_always_inline int zend_cpu_supports_avx2() { #if PHP_HAVE_BUILTIN_CPU_INIT __builtin_cpu_init(); #endif @@ -149,29 +149,28 @@ static zend_always_inline int zend_cpu_support_avx2() { } #else -static zend_always_inline int zend_cpu_support_sse2() { +static zend_always_inline int zend_cpu_supports_sse2() { return zend_cpu_supports(ZEND_CPU_FEATURE_SSE2); } -static zend_always_inline int zend_cpu_support_sse3() { - return zend_cpu_supports(ZEND_CPU_FEATURE_SSE3); +static zend_always_inline int zend_cpu_supports_ssse3() { + return zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3); } -static zend_always_inline int zend_cpu_support_sse41() { +static zend_always_inline int zend_cpu_supports_sse41() { return zend_cpu_supports(ZEND_CPU_FEATURE_SSE41); } -static zend_always_inline int zend_cpu_support_sse42() { +static zend_always_inline int zend_cpu_supports_sse42() { return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42); } -static zend_always_inline int zend_cpu_support_avx() { +static zend_always_inline int zend_cpu_supports_avx() { return zend_cpu_supports(ZEND_CPU_FEATURE_AVX); } -static zend_always_inline int zend_cpu_support_avx2() { - /* TODO */ - return 0; +static zend_always_inline int zend_cpu_supports_avx2() { + return zend_cpu_supports(ZEND_CPU_FEATURE_AVX2); } #endif diff --git a/Zend/zend_portability.h b/Zend/zend_portability.h index bd3e23edfe..abf7dcc5a4 100644 --- a/Zend/zend_portability.h +++ b/Zend/zend_portability.h @@ -520,10 +520,48 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */ # define ZEND_INTRIN_HAVE_IFUNC_TARGET 1 #endif +#if (defined(__i386__) || defined(__x86_64__)) +# if PHP_HAVE_SSSE3_INSTRUCTIONS && defined(HAVE_TMMINTRIN_H) +# define PHP_HAVE_SSSE3 +# endif + +# if PHP_HAVE_SSE4_2_INSTRUCTIONS && defined(HAVE_NMMINTRIN_H) +# define PHP_HAVE_SSE4_2 +# endif + +# if PHP_HAVE_AVX2_INSTRUCTIONS && defined(HAVE_IMMINTRIN_H) +# define PHP_HAVE_AVX2 +# endif +#endif + +#ifdef __SSSE3__ +/* Instructions compiled directly. */ +# define ZEND_INTRIN_SSSE3_NATIVE 1 +#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSSE3)) || defined(ZEND_WIN32) +/* Function resolved by ifunc or MINIT. */ +# define ZEND_INTRIN_SSSE3_RESOLVER 1 +#endif + +#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER) +# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 +#elif ZEND_INTRIN_SSSE3_RESOLVER +# define ZEND_INTRIN_SSSE3_FUNC_PTR 1 +#endif + +#if ZEND_INTRIN_SSSE3_RESOLVER +# if defined(HAVE_FUNC_ATTRIBUTE_TARGET) +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) +# else +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) func +# endif +#else +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) +#endif + #ifdef __SSE4_2__ /* Instructions compiled directly. */ # define ZEND_INTRIN_SSE4_2_NATIVE 1 -#elif (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_NMMINTRIN_H) || defined(ZEND_WIN32) +#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSE4_2)) || defined(ZEND_WIN32) /* Function resolved by ifunc or MINIT. */ # define ZEND_INTRIN_SSE4_2_RESOLVER 1 #endif @@ -544,6 +582,30 @@ static zend_always_inline double _zend_get_nan(void) /* {{{ */ # define ZEND_INTRIN_SSE4_2_FUNC_DECL(func) #endif +#ifdef __AVX2__ +/* Instructions compiled directly. */ +# define ZEND_INTRIN_AVX2_NATIVE 1 +#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32) +/* Function resolved by ifunc or MINIT. */ +# define ZEND_INTRIN_AVX2_RESOLVER 1 +#endif + +#if ZEND_INTRIN_HAVE_IFUNC_TARGET && (ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER) +# define ZEND_INTRIN_AVX2_FUNC_PROTO 1 +#elif ZEND_INTRIN_AVX2_RESOLVER +# define ZEND_INTRIN_AVX2_FUNC_PTR 1 +#endif + +#if ZEND_INTRIN_AVX2_RESOLVER +# if defined(HAVE_FUNC_ATTRIBUTE_TARGET) +# define ZEND_INTRIN_AVX2_FUNC_DECL(func) ZEND_API func __attribute__((target("avx2"))) +# else +# define ZEND_INTRIN_AVX2_FUNC_DECL(func) func +# endif +#else +# define ZEND_INTRIN_AVX2_FUNC_DECL(func) +#endif + /* Intrinsics macros end. */ #ifdef ZEND_WIN32 diff --git a/acinclude.m4 b/acinclude.m4 index 5c6a5c5f84..b8902f0947 100644 --- a/acinclude.m4 +++ b/acinclude.m4 @@ -3271,7 +3271,7 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [ AC_MSG_CHECKING([for __builtin_cpu_supports]) AC_TRY_LINK(, [ - return __builtin_cpu_supports("sse2")? 1 : 0; + return __builtin_cpu_supports("sse")? 1 : 0; ], [ have_builtin_cpu_supports=1 AC_MSG_RESULT([yes]) @@ -3282,7 +3282,28 @@ AC_DEFUN([PHP_CHECK_BUILTIN_CPU_SUPPORTS], [ AC_DEFINE_UNQUOTED([PHP_HAVE_BUILTIN_CPU_SUPPORTS], [$have_builtin_cpu_supports], [Whether the compiler supports __builtin_cpu_supports]) +]) +dnl PHP_CHECK_CPU_SUPPORTS +AC_DEFUN([PHP_CHECK_CPU_SUPPORTS], [ + AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_INIT]) + AC_REQUIRE([PHP_CHECK_BUILTIN_CPU_SUPPORTS]) + have_ext_instructions=0 + if test $have_builtin_cpu_supports = 1; then + AC_MSG_CHECKING([for $1 instructions supports]) + AC_TRY_RUN([ +int main() { + return __builtin_cpu_supports("$1")? 0 : 1; +} + ], [ + have_ext_instructions=1 + AC_MSG_RESULT([yes]) + ], [ + AC_MSG_RESULT([no]) + ]) + fi + AC_DEFINE_UNQUOTED(AS_TR_CPP([PHP_HAVE_$1_INSTRUCTIONS]), + [$have_ext_instructions], [Whether the compiler supports $1 instructions]) ]) dnl Load the AX_CHECK_COMPILE_FLAG macro from the autoconf archive. diff --git a/configure.ac b/configure.ac index 811faa97ed..531e6b4244 100644 --- a/configure.ac +++ b/configure.ac @@ -496,7 +496,9 @@ sys/utsname.h \ sys/ipc.h \ dlfcn.h \ assert.h \ -nmmintrin.h +tmmintrin.h \ +nmmintrin.h \ +immintrin.h ],[],[],[ #ifdef HAVE_SYS_PARAM_H #include @@ -571,6 +573,12 @@ PHP_CHECK_BUILTIN_CPU_INIT dnl Check __builtin_cpu_supports PHP_CHECK_BUILTIN_CPU_SUPPORTS +dnl Check instructions +PHP_CHECK_CPU_SUPPORTS([ssse3]) +PHP_CHECK_CPU_SUPPORTS([sse4.2]) +PHP_CHECK_CPU_SUPPORTS([avx]) +PHP_CHECK_CPU_SUPPORTS([avx2]) + dnl Check for members of the stat structure AC_STRUCT_ST_BLKSIZE dnl AC_STRUCT_ST_BLOCKS will screw QNX because fileblocks.o does not exists @@ -590,7 +598,6 @@ AC_TYPE_UID_T dnl Checks for sockaddr_storage and sockaddr.sa_len PHP_SOCKADDR_CHECKS -AC_MSG_CHECKING([checking building environment]) AX_GCC_FUNC_ATTRIBUTE([ifunc]) AX_GCC_FUNC_ATTRIBUTE([target]) diff --git a/ext/standard/base64.c b/ext/standard/base64.c index 06856b8221..ae2b11e188 100644 --- a/ext/standard/base64.c +++ b/ext/standard/base64.c @@ -53,47 +53,439 @@ static const short base64_reverse_table[256] = { }; /* }}} */ -PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) /* {{{ */ +static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out) /* {{{ */ { - const unsigned char *current = str; - unsigned char *p; - zend_string *result; - - result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); - p = (unsigned char *)ZSTR_VAL(result); - while (length > 2) { /* keep going until we have less than 24 bits */ - *p++ = base64_table[current[0] >> 2]; - *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; - *p++ = base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)]; - *p++ = base64_table[current[2] & 0x3f]; + while (inl > 2) { /* keep going until we have less than 24 bits */ + *out++ = base64_table[in[0] >> 2]; + *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; + *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)]; + *out++ = base64_table[in[2] & 0x3f]; - current += 3; - length -= 3; /* we just handle 3 octets of data */ + in += 3; + inl -= 3; /* we just handle 3 octets of data */ } /* now deal with the tail end of things */ - if (length != 0) { - *p++ = base64_table[current[0] >> 2]; - if (length > 1) { - *p++ = base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; - *p++ = base64_table[(current[1] & 0x0f) << 2]; - *p++ = base64_pad; + if (inl != 0) { + *out++ = base64_table[in[0] >> 2]; + if (inl > 1) { + *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; + *out++ = base64_table[(in[1] & 0x0f) << 2]; + *out++ = base64_pad; } else { - *p++ = base64_table[(current[0] & 0x03) << 4]; - *p++ = base64_pad; - *p++ = base64_pad; + *out++ = base64_table[(in[0] & 0x03) << 4]; + *out++ = base64_pad; + *out++ = base64_pad; } } - *p = '\0'; - ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); + *out = '\0'; + + return out; +} +/* }}} */ + +static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, zend_bool strict) /* {{{ */ +{ + int ch, i = 0, padding = 0, j = *outl; + + /* run through the whole string, converting as we go */ + while (inl-- > 0) { + ch = *in++; + if (ch == base64_pad) { + padding++; + continue; + } + + ch = base64_reverse_table[ch]; + if (!strict) { + /* skip unknown characters and whitespace */ + if (ch < 0) { + continue; + } + } else { + /* skip whitespace */ + if (ch == -1) { + continue; + } + /* fail on bad characters or if any data follows padding */ + if (ch == -2 || padding) { + goto fail; + } + } + + switch (i % 4) { + case 0: + out[j] = ch << 2; + break; + case 1: + out[j++] |= ch >> 4; + out[j] = (ch & 0x0f) << 4; + break; + case 2: + out[j++] |= ch >>2; + out[j] = (ch & 0x03) << 6; + break; + case 3: + out[j++] |= ch; + break; + } + i++; + } + + /* fail if the input is truncated (only one char in last group) */ + if (strict && i % 4 == 1) { + goto fail; + } + + /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding + * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ + if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { + goto fail; + } + + *outl = j; + out[j] = '\0'; + + return 1; + +fail: + return 0; +} +/* }}} */ + +/* {{{ php_base64_encode */ + +#if ZEND_INTRIN_AVX2_NATIVE +# undef ZEND_INTRIN_SSSE3_NATIVE +# undef ZEND_INTRIN_SSSE3_RESOLVER +# undef ZEND_INTRIN_SSSE3_FUNC_PROTO +# undef ZEND_INTRIN_SSSE3_FUNC_PTR +#elif ZEND_INTRIN_AVX2_FUNC_PROTO && ZEND_INTRIN_SSSE3_NATIVE +# undef ZEND_INTRIN_SSSE3_NATIVE +# define ZEND_INTRIN_SSSE3_RESOLVER 1 +# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 +# undef ZEND_INTRIN_SSSE3_FUNC_DECL +# ifdef HAVE_FUNC_ATTRIBUTE_TARGET +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) +# else +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func +# endif +#elif ZEND_INTRIN_AVX2_FUNC_PTR && ZEND_INTRIN_SSSE3_NATIVE +# undef ZEND_INTRIN_SSSE3_NATIVE +# undef ZEND_INTRIN_SSSE3_RESOLVER +# define ZEND_INTRIN_SSSE3_RESOLVER 1 +# define ZEND_INTRIN_SSSE3_FUNC_PTR 1 +# undef ZEND_INTRIN_SSSE3_FUNC_DECL +# ifdef HAVE_FUNC_ATTRIBUTE_TARGET +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) +# else +# define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func +# endif +#endif + +#if ZEND_INTRIN_AVX2_NATIVE +# include +#elif ZEND_INTRIN_SSSE3_NATIVE +# include +#elif (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) +# if ZEND_INTRIN_AVX2_RESOLVER +# include +# else +# include +# endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */ +# include "Zend/zend_cpuinfo.h" + +# if ZEND_INTRIN_AVX2_RESOLVER +ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length)); +ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict)); +# endif + +# if ZEND_INTRIN_SSSE3_RESOLVER +ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length)); +ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict)); +# endif + +zend_string *php_base64_encode_default(const unsigned char *str, size_t length); +zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict); + +# if (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) +PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) __attribute__((ifunc("resolve_base64_encode"))); +PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) __attribute__((ifunc("resolve_base64_decode"))); + +static void *resolve_base64_encode() { +# if ZEND_INTRIN_AVX2_FUNC_PROTO + if (zend_cpu_supports_avx2()) { + return php_base64_encode_avx2; + } else +# endif + if (zend_cpu_supports_ssse3()) { + return php_base64_encode_ssse3; + } + return php_base64_encode_default; +} + +static void *resolve_base64_decode() { +# if ZEND_INTRIN_AVX2_FUNC_PROTO + if (zend_cpu_supports_avx2()) { + return php_base64_decode_ex_avx2; + } else +# endif + if (zend_cpu_supports_ssse3()) { + return php_base64_decode_ex_ssse3; + } + return php_base64_decode_ex_default; +} +# else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ + +PHPAPI zend_string *(*php_base64_encode)(const unsigned char *str, size_t length) = NULL; +PHPAPI zend_string *(*php_base64_decode_ex)(const unsigned char *str, size_t length, zend_bool strict) = NULL; + +PHP_MINIT_FUNCTION(base64_intrin) +{ +# if ZEND_INTRIN_AVX2_FUNC_PTR + if (zend_cpu_supports_avx2()) { + php_base64_encode = php_base64_encode_avx2; + php_base64_decode_ex = php_base64_decode_ex_avx2; + } else +# endif + if (zend_cpu_supports_ssse3()) { + php_base64_encode = php_base64_encode_ssse3; + php_base64_decode_ex = php_base64_decode_ex_ssse3; + } else { + php_base64_encode = php_base64_encode_default; + php_base64_decode_ex = php_base64_decode_ex_default; + } + return SUCCESS; +} +# endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ +#endif /* ZEND_INTRIN_AVX2_NATIVE */ + +#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER +# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) +static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); +static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2"))); +# endif +static __m256i php_base64_encode_avx2_reshuffle(__m256i in) +{ + /* This one works with shifted (4 bytes) input in order to + * be able to work efficiently in the 2 128-bit lanes */ + __m256i t0, t1, t2, t3; + + /* input, bytes MSB to LSB: + * 0 0 0 0 x w v u t s r q p o n m + * l k j i h g f e d c b a 0 0 0 0 */ + in = _mm256_shuffle_epi8(in, _mm256_set_epi8( + 10, 11, 9, 10, + 7, 8, 6, 7, + 4, 5, 3, 4, + 1, 2, 0, 1, + + 14, 15, 13, 14, + 11, 12, 10, 11, + 8, 9, 7, 8, + 5, 6, 4, 5)); + + t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); + + t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); + + t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); + + t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); + + return _mm256_or_si256(t1, t3); + /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV + * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS + * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP + * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM + * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ + * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG + * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD + * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ +} + +static __m256i php_base64_encode_avx2_translate(__m256i in) +{ + __m256i lut, indices, mask; + + lut = _mm256_setr_epi8( + 65, 71, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -19, -16, 0, 0, + 65, 71, -4, -4, -4, -4, -4, -4, + -4, -4, -4, -4, -19, -16, 0, 0); + + indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51)); + + mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25)); + + indices = _mm256_sub_epi8(indices, mask); + + return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices)); + +} +#endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */ + +#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER +# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) +static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); +static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3"))); +# endif + +static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) +{ + __m128i t0, t1, t2, t3; + + /* input, bytes MSB to LSB: + * 0 0 0 0 l k j i h g f e d c b a */ + in = _mm_shuffle_epi8(in, _mm_set_epi8( + 10, 11, 9, 10, + 7, 8, 6, 7, + 4, 5, 3, 4, + 1, 2, 0, 1)); + + t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); + + t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); + + t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); + + t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); + + /* output (upper case are MSB, lower case are LSB): + * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ + * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG + * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD + * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ + return _mm_or_si128(t1, t3); +} + +static __m128i php_base64_encode_ssse3_translate(__m128i in) +{ + __m128i mask, indices; + __m128i lut = _mm_setr_epi8( + 65, 71, -4, -4, + -4, -4, -4, -4, + -4, -4, -4, -4, + -19, -16, 0, 0 + ); + + /* Translate values 0..63 to the Base64 alphabet. There are five sets: + * # From To Abs Index Characters + * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ + * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz + * 2 [52..61] [48..57] -4 [2..11] 0123456789 + * 3 [62] [43] -19 12 + + * 4 [63] [47] -16 13 / */ + + /* Create LUT indices from input: + * the index for range #0 is right, others are 1 less than expected: */ + indices = _mm_subs_epu8(in, _mm_set1_epi8(51)); + + /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */ + mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25)); + + /* substract -1, so add 1 to indices for range #[1..4], All indices are now correct: */ + indices = _mm_sub_epi8(indices, mask); + + /* Add offsets to input values: */ + return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices)); +} +#endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */ + +#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER +# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE +PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) +# elif ZEND_INTRIN_AVX2_RESOLVER +zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length) +# elif ZEND_INTRIN_SSSE3_RESOLVER +zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length) +# endif +{ + const unsigned char *c = str; + unsigned char *o; + zend_string *result; + + result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); + o = (unsigned char *)ZSTR_VAL(result); +# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER + if (length > 31) { + __m256i s = _mm256_loadu_si256((__m256i *)c); + + s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); + + for (;;) { + s = php_base64_encode_avx2_reshuffle(s); + + s = php_base64_encode_avx2_translate(s); + + _mm256_storeu_si256((__m256i *)o, s); + c += 24; + o += 32; + length -= 24; + if (length < 28) { + break; + } + s = _mm256_loadu_si256((__m256i *)(c - 4)); + } + } +# else + while (length > 15) { + __m128i s = _mm_loadu_si128((__m128i *)c); + + s = php_base64_encode_ssse3_reshuffle(s); + + s = php_base64_encode_ssse3_translate(s); + + _mm_storeu_si128((__m128i *)o, s); + c += 12; + o += 16; + length -= 12; + } +# endif + + o = php_base64_encode_impl(c, length, o); + + ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); return result; } + +# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER +zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length) +{ + const unsigned char *c = str; + unsigned char *o; + zend_string *result; + + result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); + o = (unsigned char *)ZSTR_VAL(result); + while (length > 15) { + __m128i s = _mm_loadu_si128((__m128i *)c); + + s = php_base64_encode_ssse3_reshuffle(s); + + s = php_base64_encode_ssse3_translate(s); + + _mm_storeu_si128((__m128i *)o, s); + c += 12; + o += 16; + length -= 12; + } + + o = php_base64_encode_impl(c, length, o); + + ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); + + return result; +} +# endif +#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ + /* }}} */ -/* {{{ */ +/* {{{ php_base64_decode_ex */ /* generate reverse table (do not set index 0 to 64) static unsigned short base64_reverse_table[256]; #define rt base64_reverse_table @@ -125,78 +517,300 @@ void php_base64_init(void) efree(s); } */ -/* }}} */ -PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) /* {{{ */ +#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER +# if ZEND_INTRIN_AVX2_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) +static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); +# endif + +static __m256i php_base64_decode_avx2_reshuffle(__m256i in) { - const unsigned char *current = str; - int ch, i = 0, j = 0, padding = 0; + __m256i merge_ab_and_bc, out; + + merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); + + out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); + + out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, + 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); + + return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); +} +#endif + +#if ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER +# if ZEND_INTRIN_SSSE3_RESOLVER && defined(HAVE_FUNC_ATTRIBUTE_TARGET) +static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); +# endif + +static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) +{ + __m128i merge_ab_and_bc, out; + + merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140)); + /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK + * 0000hhhh IIiiiiii 0000GGGG GGggHHHH + * 0000eeee FFffffff 0000DDDD DDddEEEE + * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */ + + out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000)); + /* 00000000 JJJJJJjj KKKKkkkk LLllllll + * 00000000 GGGGGGgg HHHHhhhh IIiiiiii + * 00000000 DDDDDDdd EEEEeeee FFffffff + * 00000000 AAAAAAaa BBBBbbbb CCcccccc */ + + return _mm_shuffle_epi8(out, _mm_setr_epi8( + 2, 1, 0, + 6, 5, 4, + 10, 9, 8, + 14, 13, 12, + -1, -1, -1, -1)); + /* 00000000 00000000 00000000 00000000 + * LLllllll KKKKkkkk JJJJJJjj IIiiiiii + * HHHHhhhh GGGGGGgg FFffffff EEEEeeee + * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */ +} +#endif + +#if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER +# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_SSSE3_NATIVE +PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) +# elif ZEND_INTRIN_AVX2_RESOLVER +zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, zend_bool strict) +# else +zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict) +# endif +{ + const unsigned char *c = str; + unsigned char *o; + size_t outl = 0; zend_string *result; result = zend_string_alloc(length, 0); + o = (unsigned char *)ZSTR_VAL(result); - /* run through the whole string, converting as we go */ - while (length-- > 0) { - ch = *current++; - if (ch == base64_pad) { - padding++; - continue; - } + /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions" + * https://arxiv.org/pdf/1704.00605.pdf */ +# if ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER + while (length > 31 + 11 + 2) { + __m256i lut_lo, lut_hi, lut_roll; + __m256i hi_nibbles, lo_nibbles, hi, lo; + __m256i str = _mm256_loadu_si256((__m256i *)c); - ch = base64_reverse_table[ch]; - if (!strict) { - /* skip unknown characters and whitespace */ - if (ch < 0) { - continue; - } + lut_lo = _mm256_setr_epi8( + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); + + lut_hi = _mm256_setr_epi8( + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); + + lut_roll = _mm256_setr_epi8( + 0, 16, 19, 4, -65, -65, -71, -71, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 16, 19, 4, -65, -65, -71, -71, + 0, 0, 0, 0, 0, 0, 0, 0); + + hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f)); + lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f)); + hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); + lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); + + if (!_mm256_testz_si256(lo, hi)) { + break; } else { - /* skip whitespace */ - if (ch == -1) { - continue; - } - /* fail on bad characters or if any data follows padding */ - if (ch == -2 || padding) { - goto fail; - } + __m256i eq_2f, roll; + eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f)); + roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles)); + + + str = _mm256_add_epi8(str, roll); + + str = php_base64_decode_avx2_reshuffle(str); + + _mm256_storeu_si256((__m256i *)o, str); + + c += 32; + o += 24; + outl += 24; + length -= 32; } + } +# else + while (length > 15 + 6 + 2) { + __m128i lut_lo, lut_hi, lut_roll; + __m128i hi_nibbles, lo_nibbles, hi, lo; - switch(i % 4) { - case 0: - ZSTR_VAL(result)[j] = ch << 2; - break; - case 1: - ZSTR_VAL(result)[j++] |= ch >> 4; - ZSTR_VAL(result)[j] = (ch & 0x0f) << 4; - break; - case 2: - ZSTR_VAL(result)[j++] |= ch >>2; - ZSTR_VAL(result)[j] = (ch & 0x03) << 6; - break; - case 3: - ZSTR_VAL(result)[j++] |= ch; + __m128i s = _mm_loadu_si128((__m128i *)c); + + lut_lo = _mm_setr_epi8( + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); + + lut_hi = _mm_setr_epi8( + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); + + lut_roll = _mm_setr_epi8( + 0, 16, 19, 4, -65, -65, -71, -71, + 0, 0, 0, 0, 0, 0, 0, 0); + + hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); + lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); + hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); + lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); + + /* Check for invalid input: if any "and" values from lo and hi are not zero, + fall back on bytewise code to do error checking and reporting: */ + if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) { break; + } else { + __m128i eq_2f, roll; + + eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); + roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); + + s = _mm_add_epi8(s, roll); + + s = php_base64_decode_ssse3_reshuffle(s); + + _mm_storeu_si128((__m128i *)o, s); + + c += 16; + o += 12; + outl += 12; + length -= 16; } - i++; } - /* fail if the input is truncated (only one char in last group) */ - if (strict && i % 4 == 1) { - goto fail; +# endif + + if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { + zend_string_free(result); + return NULL; } - /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding - * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ - if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { - goto fail; + + ZSTR_LEN(result) = outl; + + return result; +} + +# if ZEND_INTRIN_SSSE3_RESOLVER && ZEND_INTRIN_AVX2_RESOLVER +zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, zend_bool strict) +{ + const unsigned char *c = str; + unsigned char *o; + size_t outl = 0; + zend_string *result; + + result = zend_string_alloc(length, 0); + o = (unsigned char *)ZSTR_VAL(result); + + while (length > 15 + 2) { + __m128i lut_lo, lut_hi, lut_roll; + __m128i hi_nibbles, lo_nibbles, hi, lo; + + __m128i s = _mm_loadu_si128((__m128i *)c); + + lut_lo = _mm_setr_epi8( + 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, + 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); + + lut_hi = _mm_setr_epi8( + 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); + + lut_roll = _mm_setr_epi8( + 0, 16, 19, 4, -65, -65, -71, -71, + 0, 0, 0, 0, 0, 0, 0, 0); + + hi_nibbles = _mm_and_si128(_mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); + lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); + hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); + lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); + + /* Check for invalid input: if any "and" values from lo and hi are not zero, + fall back on bytewise code to do error checking and reporting: */ + if (UNEXPECTED(_mm_movemask_epi8(_mm_cmpgt_epi8(_mm_and_si128(lo, hi), _mm_set1_epi8(0))) != 0)) { + break; + } else { + __m128i eq_2f, roll; + + eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); + roll = _mm_shuffle_epi8(lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); + + s = _mm_add_epi8(s, roll); + + s = php_base64_decode_ssse3_reshuffle(s); + + _mm_storeu_si128((__m128i *)o, s); + + c += 16; + o += 12; + outl += 12; + length -= 16; + } + } + + if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { + zend_string_free(result); + return NULL; } - ZSTR_LEN(result) = j; - ZSTR_VAL(result)[ZSTR_LEN(result)] = '\0'; + ZSTR_LEN(result) = outl; return result; +} +# endif +#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ -fail: - zend_string_free(result); - return NULL; +#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE +#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER +zend_string *php_base64_encode_default(const unsigned char *str, size_t length) +#else +PHPAPI zend_string *php_base64_encode(const unsigned char *str, size_t length) +#endif +{ + unsigned char *p; + zend_string *result; + + result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); + p = (unsigned char *)ZSTR_VAL(result); + + p = php_base64_encode_impl(str, length, p); + + ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); + + return result; +} +#endif + +#if !ZEND_INTRIN_AVX2_NATIVE && !ZEND_INTRIN_SSSE3_NATIVE +#if ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_RESOLVER +zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, zend_bool strict) +#else +PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, zend_bool strict) +#endif +{ + zend_string *result; + size_t outl = 0; + + result = zend_string_alloc(length, 0); + + if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { + zend_string_free(result); + return NULL; + } + + ZSTR_LEN(result) = outl; + + return result; } +#endif /* }}} */ /* {{{ proto string base64_encode(string str) diff --git a/ext/standard/base64.h b/ext/standard/base64.h index f380d3c888..c9fe26e985 100644 --- a/ext/standard/base64.h +++ b/ext/standard/base64.h @@ -21,16 +21,56 @@ #ifndef BASE64_H #define BASE64_H +/* + * SSSE3 and AVX2 implementation are based on https://github.com/aklomp/base64 + * which is copyrighted to: + * + * Copyright (c) 2005-2007, Nick Galbreath + * Copyright (c) 2013-2017, Alfred Klomp + * Copyright (c) 2015-2017, Wojciech Mula + * Copyright (c) 2016-2017, Matthieu Darbois + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + PHP_FUNCTION(base64_decode); PHP_FUNCTION(base64_encode); +#if (ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR) && !ZEND_INTRIN_AVX2_NATIVE +PHP_MINIT_FUNCTION(base64_intrin); +PHPAPI extern zend_string *(*php_base64_encode)(const unsigned char *, size_t); +PHPAPI extern zend_string *(*php_base64_decode_ex)(const unsigned char *, size_t, zend_bool); +#else PHPAPI extern zend_string *php_base64_encode(const unsigned char *, size_t); +PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool); +#endif + static inline zend_string *php_base64_encode_str(const zend_string *str) { return php_base64_encode((const unsigned char*)(ZSTR_VAL(str)), ZSTR_LEN(str)); } -PHPAPI extern zend_string *php_base64_decode_ex(const unsigned char *, size_t, zend_bool); - static inline zend_string *php_base64_decode(const unsigned char *str, size_t len) { return php_base64_decode_ex(str, len, 0); } diff --git a/ext/standard/basic_functions.c b/ext/standard/basic_functions.c index b322caa65e..4404ad1df0 100644 --- a/ext/standard/basic_functions.c +++ b/ext/standard/basic_functions.c @@ -3692,6 +3692,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */ BASIC_MINIT_SUBMODULE(string_intrin) #endif +#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR + BASIC_MINIT_SUBMODULE(base64_intrin) +#endif + BASIC_MINIT_SUBMODULE(crypt) BASIC_MINIT_SUBMODULE(lcg) diff --git a/ext/standard/string.c b/ext/standard/string.c index 73bbb24420..0023c4ba3b 100644 --- a/ext/standard/string.c +++ b/ext/standard/string.c @@ -3873,7 +3873,7 @@ zend_string *php_addslashes_default(zend_string *str, int should_free); PHPAPI zend_string *php_addslashes(zend_string *str, int should_free) __attribute__((ifunc("resolve_addslashes"))); static void *resolve_addslashes() { - if (zend_cpu_support_sse42()) { + if (zend_cpu_supports_sse42()) { return php_addslashes_sse42; } return php_addslashes_default;