From: Sebastian Pop Date: Wed, 1 May 2019 16:05:11 +0000 (+0000) Subject: [AArch64] Use crc32 instructions when available X-Git-Tag: php-7.4.0beta1~169 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2a535a9707c89502df8bc0bd785f2e9192929422;p=php [AArch64] Use crc32 instructions when available The time goes from 0.838s down to 0.029s (a 28x speedup) on a Graviton A1 instance and the following benchmark: function simple_crc32() { $a = "foo"; for ($i = 0; $i < 10000; $i++) { crc32($a); $a .= "o".$i; } } --- diff --git a/ext/standard/crc32.c b/ext/standard/crc32.c index fb6e85ab07..904ea25ff7 100644 --- a/ext/standard/crc32.c +++ b/ext/standard/crc32.c @@ -20,6 +20,32 @@ #include "basic_functions.h" #include "crc32.h" +#if defined(__aarch64__) +# pragma GCC target ("+nothing+crc") +# include +# if defined(__linux__) +# include +# include +# endif + +static inline int has_crc32_insn() { + /* Only go through the runtime detection once. */ + static int res = -1; + if (res != -1) + return res; +# if defined(HWCAP_CRC32) + res = getauxval(AT_HWCAP) & HWCAP_CRC32; + return res; +# elif defined(HWCAP2_CRC32) + res = getauxval(AT_HWCAP2) & HWCAP2_CRC32; + return res; +# else + res = 0; + return res; +# endif +} +#endif + /* {{{ proto string crc32(string str) Calculate the crc32 polynomial of a string */ PHP_NAMED_FUNCTION(php_if_crc32) @@ -35,6 +61,30 @@ PHP_NAMED_FUNCTION(php_if_crc32) crc = crcinit^0xFFFFFFFF; +#if defined(__aarch64__) + if (has_crc32_insn()) { + while(nr >= sizeof(uint64_t)) { + crc = __crc32d(crc, *(uint64_t *)p); + p += sizeof(uint64_t); + nr -= sizeof(uint64_t); + } + if (nr >= sizeof(int32_t)) { + crc = __crc32w(crc, *(uint32_t *)p); + p += sizeof(uint32_t); + nr -= sizeof(uint32_t); + } + if (nr >= sizeof(int16_t)) { + crc = __crc32h(crc, *(uint16_t *)p); + p += sizeof(uint16_t); + nr -= sizeof(uint16_t); + } + if (nr) { + crc = __crc32b(crc, *p); + p += sizeof(uint8_t); + nr -= sizeof(uint8_t); + } + } +#endif for (; nr--; ++p) { crc = ((crc >> 8) & 0x00FFFFFF) ^ crc32tab[(crc ^ (*p)) & 0xFF ]; }