From adc3b720765f5c922f904279151b2af298080901 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Thu, 25 Apr 2019 14:44:02 +0000 Subject: [PATCH] speed up add and sub operators with overflow detection On A72, google-benchmark measure before and after the patch: -------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------- BM_add_before 13.3 ns 13.3 ns 52626058 BM_sub_before 8.72 ns 8.72 ns 80259343 BM_add_after 4.80 ns 4.80 ns 145926004 BM_sub_after 4.80 ns 4.80 ns 145936496 Before the patch: fast_long_add_function: ldr x1, [x1] ldr x2, [x2] add x3, x1, x2 eor x4, x1, x2 tbz x4, #63, .L5 .L2: mov w1, 4 str x3, [x0] str w1, [x0, 8] ret .p2align 2 .L5: eor x4, x1, x3 tbz x4, #63, .L2 scvtf d0, x1 scvtf d1, x2 mov w1, 5 str w1, [x0, 8] fadd d0, d0, d1 str d0, [x0] ret With the patch: fast_long_add_function: ldr x5, [x1] ldr x6, [x2] adds x5, x5, x6 bvs .L2 mov w6, 4 str x5, [x0] str w6, [x0, 8] ret .L2: ldr x1, [x1] mov w3, 5 ldr x2, [x2] str w3, [x0, 8] scvtf d0, x1 scvtf d1, x2 fadd d0, d0, d1 str d0, [x0] ret php$ ./sapi/cli/php Zend/bench.php Base: Patch: simple 0.091 simple 0.091 simplecall 0.014 simplecall 0.014 simpleucall 0.041 simpleucall 0.041 simpleudcall 0.045 simpleudcall 0.045 mandel 0.193 mandel 0.193 mandel2 0.229 mandel2 0.229 ackermann(7) 0.044 ackermann(7) 0.044 ary(50000) 0.010 ary(50000) 0.010 ary2(50000) 0.008 ary2(50000) 0.008 ary3(2000) 0.096 ary3(2000) 0.095 fibo(30) 0.149 fibo(30) 0.148 hash1(50000) 0.016 hash1(50000) 0.016 hash2(500) 0.020 hash2(500) 0.020 heapsort(20000) 0.055 heapsort(20000) 0.054 matrix(20) 0.057 matrix(20) 0.057 nestedloop(12) 0.091 nestedloop(12) 0.091 sieve(30) 0.032 sieve(30) 0.032 strcat(200000) 0.010 strcat(200000) 0.010 ------------------------ ------------------------ Total 1.199 Total 1.197 php$ ./sapi/cli/php Zend/micro_bench.php Base: Patch: empty_loop 0.051 empty_loop 0.051 func() 0.181 0.130 func() 0.181 0.130 undef_func() 0.186 0.135 undef_func() 0.186 0.135 int_func() 0.116 0.064 int_func() 0.116 0.064 $x = self::$x 0.235 0.183 $x = self::$x 0.233 0.182 self::$x = 0 0.198 0.147 self::$x = 0 0.198 0.147 isset(self::$x) 0.229 0.178 isset(self::$x) 0.229 0.178 empty(self::$x) 0.231 0.180 empty(self::$x) 0.231 0.180 $x = Foo::$x 0.144 0.093 $x = Foo::$x 0.144 0.093 Foo::$x = 0 0.107 0.056 Foo::$x = 0 0.107 0.056 isset(Foo::$x) 0.140 0.088 isset(Foo::$x) 0.140 0.088 empty(Foo::$x) 0.148 0.097 empty(Foo::$x) 0.148 0.097 self::f() 0.238 0.187 self::f() 0.238 0.187 Foo::f() 0.209 0.158 Foo::f() 0.209 0.158 $x = $this->x 0.123 0.072 $x = $this->x 0.123 0.072 $this->x = 0 0.124 0.073 $this->x = 0 0.124 0.073 $this->x += 2 0.151 0.099 $this->x += 2 0.153 0.101 ++$this->x 0.137 0.086 ++$this->x 0.138 0.086 --$this->x 0.137 0.086 --$this->x 0.138 0.086 $this->x++ 0.170 0.119 $this->x++ 0.172 0.121 $this->x-- 0.171 0.119 $this->x-- 0.172 0.121 isset($this->x) 0.170 0.119 isset($this->x) 0.170 0.119 empty($this->x) 0.179 0.128 empty($this->x) 0.179 0.128 $this->f() 0.194 0.143 $this->f() 0.194 0.143 $x = Foo::TEST 0.188 0.137 $x = Foo::TEST 0.188 0.136 new Foo() 0.482 0.431 new Foo() 0.479 0.427 $x = TEST 0.109 0.058 $x = TEST 0.109 0.058 $x = $_GET 0.190 0.138 $x = $_GET 0.190 0.139 $x = $GLOBALS['v'] 0.242 0.191 $x = $GLOBALS['v'] 0.242 0.191 $x = $hash['v'] 0.196 0.145 $x = $hash['v'] 0.196 0.145 $x = $str[0] 0.146 0.094 $x = $str[0] 0.145 0.094 $x = $a ?: null 0.144 0.093 $x = $a ?: null 0.144 0.093 $x = $f ?: tmp 0.174 0.123 $x = $f ?: tmp 0.174 0.123 $x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : tmp 0.148 0.097 $x = $f ? $f : tmp 0.148 0.097 ------------------------ ------------------------ Total 6.143 Total 6.143 --- Zend/zend_operators.h | 40 ++++++++++++++++++++++++++++++++++++++++ configure.ac | 4 ++++ 2 files changed, 44 insertions(+) diff --git a/Zend/zend_operators.h b/Zend/zend_operators.h index 6fe0bddc3d..869f575efa 100644 --- a/Zend/zend_operators.h +++ b/Zend/zend_operators.h @@ -604,6 +604,26 @@ overflow: ZEND_ATTRIBUTE_COLD_LABEL return; overflow: ZEND_ATTRIBUTE_COLD_LABEL ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2)); +#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__) + __asm__ goto( + "ldr x5, [%1]\n\t" + "ldr x6, [%2]\n\t" + "adds x5, x5, x6\n\t" + "bvs %l5\n\t" + "mov w6, %3\n\t" + "str x5, [%0]\n\t" + "str w6, [%0, %c4]\n" + : + : "r"(&result->value), + "r"(&op1->value), + "r"(&op2->value), + "n"(IS_LONG), + "n"(ZVAL_OFFSETOF_TYPE) + : "x5", "x6", "cc", "memory" + : overflow); + return; +overflow: ZEND_ATTRIBUTE_COLD_LABEL + ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2)); #elif PHP_HAVE_BUILTIN_SADDL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG long lresult; if (UNEXPECTED(__builtin_saddl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) { @@ -694,6 +714,26 @@ overflow: ZEND_ATTRIBUTE_COLD_LABEL return; overflow: ZEND_ATTRIBUTE_COLD_LABEL ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2)); +#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__) + __asm__ goto( + "ldr x5, [%1]\n\t" + "ldr x6, [%2]\n\t" + "subs x5, x5, x6\n\t" + "bvs %l5\n\t" + "mov w6, %3\n\t" + "str x5, [%0]\n\t" + "str w6, [%0, %c4]\n" + : + : "r"(&result->value), + "r"(&op1->value), + "r"(&op2->value), + "n"(IS_LONG), + "n"(ZVAL_OFFSETOF_TYPE) + : "x5", "x6", "cc", "memory" + : overflow); + return; +overflow: ZEND_ATTRIBUTE_COLD_LABEL + ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2)); #elif PHP_HAVE_BUILTIN_SSUBL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG long lresult; if (UNEXPECTED(__builtin_ssubl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) { diff --git a/configure.ac b/configure.ac index 199e2435a5..80628a5df1 100644 --- a/configure.ac +++ b/configure.ac @@ -737,7 +737,11 @@ dnl Check for asm goto support AC_CACHE_CHECK([for asm goto], ac_cv__asm_goto, [AC_RUN_IFELSE([AC_LANG_SOURCE([[ int main(void) { +#if defined(__x86_64__) || defined(__i386__) __asm__ goto("jmp %l0\n" :::: end); +#elif defined(__aarch64__) + __asm__ goto("b %l0\n" :::: end); +#endif end: return 0; } -- 2.40.0