On A72, google-benchmark measure before and after the patch:
--------------------------------------------------------
Benchmark Time CPU Iterations
--------------------------------------------------------
BM_add_before 13.3 ns 13.3 ns
52626058
BM_sub_before 8.72 ns 8.72 ns
80259343
BM_add_after 4.80 ns 4.80 ns
145926004
BM_sub_after 4.80 ns 4.80 ns
145936496
Before the patch:
fast_long_add_function:
ldr x1, [x1]
ldr x2, [x2]
add x3, x1, x2
eor x4, x1, x2
tbz x4, #63, .L5
.L2:
mov w1, 4
str x3, [x0]
str w1, [x0, 8]
ret
.p2align 2
.L5:
eor x4, x1, x3
tbz x4, #63, .L2
scvtf d0, x1
scvtf d1, x2
mov w1, 5
str w1, [x0, 8]
fadd d0, d0, d1
str d0, [x0]
ret
With the patch:
fast_long_add_function:
ldr x5, [x1]
ldr x6, [x2]
adds x5, x5, x6
bvs .L2
mov w6, 4
str x5, [x0]
str w6, [x0, 8]
ret
.L2:
ldr x1, [x1]
mov w3, 5
ldr x2, [x2]
str w3, [x0, 8]
scvtf d0, x1
scvtf d1, x2
fadd d0, d0, d1
str d0, [x0]
ret
php$ ./sapi/cli/php Zend/bench.php
Base: Patch:
simple 0.091 simple 0.091
simplecall 0.014 simplecall 0.014
simpleucall 0.041 simpleucall 0.041
simpleudcall 0.045 simpleudcall 0.045
mandel 0.193 mandel 0.193
mandel2 0.229 mandel2 0.229
ackermann(7) 0.044 ackermann(7) 0.044
ary(50000) 0.010 ary(50000) 0.010
ary2(50000) 0.008 ary2(50000) 0.008
ary3(2000) 0.096 ary3(2000) 0.095
fibo(30) 0.149 fibo(30) 0.148
hash1(50000) 0.016 hash1(50000) 0.016
hash2(500) 0.020 hash2(500) 0.020
heapsort(20000) 0.055 heapsort(20000) 0.054
matrix(20) 0.057 matrix(20) 0.057
nestedloop(12) 0.091 nestedloop(12) 0.091
sieve(30) 0.032 sieve(30) 0.032
strcat(200000) 0.010 strcat(200000) 0.010
------------------------ ------------------------
Total 1.199 Total 1.197
php$ ./sapi/cli/php Zend/micro_bench.php
Base: Patch:
empty_loop 0.051 empty_loop 0.051
func() 0.181 0.130 func() 0.181 0.130
undef_func() 0.186 0.135 undef_func() 0.186 0.135
int_func() 0.116 0.064 int_func() 0.116 0.064
$x = self::$x 0.235 0.183 $x = self::$x 0.233 0.182
self::$x = 0 0.198 0.147 self::$x = 0 0.198 0.147
isset(self::$x) 0.229 0.178 isset(self::$x) 0.229 0.178
empty(self::$x) 0.231 0.180 empty(self::$x) 0.231 0.180
$x = Foo::$x 0.144 0.093 $x = Foo::$x 0.144 0.093
Foo::$x = 0 0.107 0.056 Foo::$x = 0 0.107 0.056
isset(Foo::$x) 0.140 0.088 isset(Foo::$x) 0.140 0.088
empty(Foo::$x) 0.148 0.097 empty(Foo::$x) 0.148 0.097
self::f() 0.238 0.187 self::f() 0.238 0.187
Foo::f() 0.209 0.158 Foo::f() 0.209 0.158
$x = $this->x 0.123 0.072 $x = $this->x 0.123 0.072
$this->x = 0 0.124 0.073 $this->x = 0 0.124 0.073
$this->x += 2 0.151 0.099 $this->x += 2 0.153 0.101
++$this->x 0.137 0.086 ++$this->x 0.138 0.086
--$this->x 0.137 0.086 --$this->x 0.138 0.086
$this->x++ 0.170 0.119 $this->x++ 0.172 0.121
$this->x-- 0.171 0.119 $this->x-- 0.172 0.121
isset($this->x) 0.170 0.119 isset($this->x) 0.170 0.119
empty($this->x) 0.179 0.128 empty($this->x) 0.179 0.128
$this->f() 0.194 0.143 $this->f() 0.194 0.143
$x = Foo::TEST 0.188 0.137 $x = Foo::TEST 0.188 0.136
new Foo() 0.482 0.431 new Foo() 0.479 0.427
$x = TEST 0.109 0.058 $x = TEST 0.109 0.058
$x = $_GET 0.190 0.138 $x = $_GET 0.190 0.139
$x = $GLOBALS['v'] 0.242 0.191 $x = $GLOBALS['v'] 0.242 0.191
$x = $hash['v'] 0.196 0.145 $x = $hash['v'] 0.196 0.145
$x = $str[0] 0.146 0.094 $x = $str[0] 0.145 0.094
$x = $a ?: null 0.144 0.093 $x = $a ?: null 0.144 0.093
$x = $f ?: tmp 0.174 0.123 $x = $f ?: tmp 0.174 0.123
$x = $f ? $f : $a 0.153 0.101 $x = $f ? $f : $a 0.153 0.101
$x = $f ? $f : tmp 0.148 0.097 $x = $f ? $f : tmp 0.148 0.097
------------------------ ------------------------
Total 6.143 Total 6.143
return;
overflow: ZEND_ATTRIBUTE_COLD_LABEL
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
+#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__)
+ __asm__ goto(
+ "ldr x5, [%1]\n\t"
+ "ldr x6, [%2]\n\t"
+ "adds x5, x5, x6\n\t"
+ "bvs %l5\n\t"
+ "mov w6, %3\n\t"
+ "str x5, [%0]\n\t"
+ "str w6, [%0, %c4]\n"
+ :
+ : "r"(&result->value),
+ "r"(&op1->value),
+ "r"(&op2->value),
+ "n"(IS_LONG),
+ "n"(ZVAL_OFFSETOF_TYPE)
+ : "x5", "x6", "cc", "memory"
+ : overflow);
+ return;
+overflow: ZEND_ATTRIBUTE_COLD_LABEL
+ ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) + (double) Z_LVAL_P(op2));
#elif PHP_HAVE_BUILTIN_SADDL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG
long lresult;
if (UNEXPECTED(__builtin_saddl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) {
return;
overflow: ZEND_ATTRIBUTE_COLD_LABEL
ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
+#elif defined(HAVE_ASM_GOTO) && defined(__aarch64__)
+ __asm__ goto(
+ "ldr x5, [%1]\n\t"
+ "ldr x6, [%2]\n\t"
+ "subs x5, x5, x6\n\t"
+ "bvs %l5\n\t"
+ "mov w6, %3\n\t"
+ "str x5, [%0]\n\t"
+ "str w6, [%0, %c4]\n"
+ :
+ : "r"(&result->value),
+ "r"(&op1->value),
+ "r"(&op2->value),
+ "n"(IS_LONG),
+ "n"(ZVAL_OFFSETOF_TYPE)
+ : "x5", "x6", "cc", "memory"
+ : overflow);
+ return;
+overflow: ZEND_ATTRIBUTE_COLD_LABEL
+ ZVAL_DOUBLE(result, (double) Z_LVAL_P(op1) - (double) Z_LVAL_P(op2));
#elif PHP_HAVE_BUILTIN_SSUBL_OVERFLOW && SIZEOF_LONG == SIZEOF_ZEND_LONG
long lresult;
if (UNEXPECTED(__builtin_ssubl_overflow(Z_LVAL_P(op1), Z_LVAL_P(op2), &lresult))) {
AC_CACHE_CHECK([for asm goto], ac_cv__asm_goto,
[AC_RUN_IFELSE([AC_LANG_SOURCE([[
int main(void) {
+#if defined(__x86_64__) || defined(__i386__)
__asm__ goto("jmp %l0\n" :::: end);
+#elif defined(__aarch64__)
+ __asm__ goto("b %l0\n" :::: end);
+#endif
end:
return 0;
}