From: Fangrui Song Date: Fri, 19 Apr 2019 02:06:06 +0000 (+0000) Subject: [APInt] Optimize umul_ov X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f238d270732214f8f87d086ce50b68e770c05e47;p=llvm [APInt] Optimize umul_ov Change two costly udiv() calls to lshr(1)*RHS + left-shift + plus On one 64-bit umul_ov benchmark, I measured an obvious improvement: 12.8129s -> 3.6257s Note, there may be some value to special case 64-bit (the most common case) with __builtin_umulll_overflow(). Differential Revision: https://reviews.llvm.org/D60669 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@358730 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 5ed176f1b62..9c59d93b737 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -1914,12 +1914,19 @@ APInt APInt::smul_ov(const APInt &RHS, bool &Overflow) const { } APInt APInt::umul_ov(const APInt &RHS, bool &Overflow) const { - APInt Res = *this * RHS; + if (countLeadingZeros() + RHS.countLeadingZeros() + 2 <= BitWidth) { + Overflow = true; + return *this * RHS; + } - if (*this != 0 && RHS != 0) - Overflow = Res.udiv(RHS) != *this || Res.udiv(*this) != RHS; - else - Overflow = false; + APInt Res = lshr(1) * RHS; + Overflow = Res.isNegative(); + Res <<= 1; + if ((*this)[0]) { + Res += RHS; + if (Res.ult(RHS)) + Overflow = true; + } return Res; } diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp index 6ef5b25f8d4..a92a654ac17 100644 --- a/unittests/ADT/APIntTest.cpp +++ b/unittests/ADT/APIntTest.cpp @@ -2381,6 +2381,42 @@ TEST(APIntTest, RoundingSDiv) { } } +TEST(APIntTest, umul_ov) { + const std::pair Overflows[] = { + {0x8000000000000000, 2}, + {0x5555555555555556, 3}, + {4294967296, 4294967296}, + {4294967295, 4294967298}, + }; + const std::pair NonOverflows[] = { + {0x7fffffffffffffff, 2}, + {0x5555555555555555, 3}, + {4294967295, 4294967297}, + }; + + bool Overflow; + for (auto &X : Overflows) { + APInt A(64, X.first); + APInt B(64, X.second); + (void)A.umul_ov(B, Overflow); + EXPECT_TRUE(Overflow); + } + for (auto &X : NonOverflows) { + APInt A(64, X.first); + APInt B(64, X.second); + (void)A.umul_ov(B, Overflow); + EXPECT_FALSE(Overflow); + } + + for (unsigned Bits = 1; Bits <= 5; ++Bits) + for (unsigned A = 0; A != 1u << Bits; ++A) + for (unsigned B = 0; B != 1u << Bits; ++B) { + APInt C = APInt(Bits, A).umul_ov(APInt(Bits, B), Overflow); + APInt D = APInt(2 * Bits, A) * APInt(2 * Bits, B); + EXPECT_TRUE(D.getHiBits(Bits).isNullValue() != Overflow); + } +} + TEST(APIntTest, SolveQuadraticEquationWrap) { // Verify that "Solution" is the first non-negative integer that solves // Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution