From 305199a13163e246b909ef32880df032823bff05 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Tue, 26 May 2015 22:42:13 +1000 Subject: [PATCH] Improve performance of PowerPC64 ZEND_SIGNED_MULTIPLY_LONG Detecting overflow with the XER is slow, partially because we have to clear it before use. We can do better by using a trick where we compare the high 64 bits of the result with the low 64 bits shifted right 63 bits. This is 7% faster on a POWER8 running a simple testcase: --- Zend/zend_multiply.h | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/Zend/zend_multiply.h b/Zend/zend_multiply.h index b12c1e23b3..fc053d0e35 100644 --- a/Zend/zend_multiply.h +++ b/Zend/zend_multiply.h @@ -74,20 +74,19 @@ #elif defined(__powerpc64__) && defined(__GNUC__) -#define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \ - long __tmpvar; \ - __asm__("li 14, 0\n\t" \ - "mtxer 14\n\t" \ - "mulldo. %0, %2,%3\n\t" \ - "xor %1, %1, %1\n\t" \ - "bns+ 0f\n\t" \ - "li %1, 1\n\t" \ - "0:\n" \ - : "=r"(__tmpvar),"=r"(usedval) \ - : "r"(a), "r"(b) \ - : "r14", "cc"); \ - if (usedval) (dval) = (double) (a) * (double) (b); \ - else (lval) = __tmpvar; \ +#define ZEND_SIGNED_MULTIPLY_LONG(a, b, lval, dval, usedval) do { \ + long __low, __high; \ + __asm__("mulld %0,%2,%3\n\t" \ + "mulhd %1,%2,%3\n" \ + : "=&r"(__low), "=&r"(__high) \ + : "r"(a), "r"(b)); \ + if ((__low >> 63) != __high) { \ + (dval) = (double) (a) * (double) (b); \ + (usedval) = 1; \ + } else { \ + (lval) = __low; \ + (usedval) = 0; \ + } \ } while (0) #elif SIZEOF_ZEND_LONG == 4 -- 2.40.0