From: Bjorn Pettersson Date: Sat, 7 Sep 2019 12:16:23 +0000 (+0000) Subject: [CodeGen] Handle SMULFIXSAT with scale zero in TargetLowering::expandFixedPointMul X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fb1c8990aef497a3b66cda83bb04ca13a41837c3;p=llvm [CodeGen] Handle SMULFIXSAT with scale zero in TargetLowering::expandFixedPointMul Summary: Normally TargetLowering::expandFixedPointMul would handle SMULFIXSAT with scale zero by using an SMULO to compute the product and determine if saturation is needed (if overflow happened). But if SMULO isn't custom/legal it falls through and uses the same technique, using MULHS/SMUL_LOHI, as used for non-zero scales. Problem was that when checking for overflow (handling saturation) when not using MULO we did not expect to find a zero scale. So we ended up in an assertion when doing APInt::getLowBitsSet(VTSize, Scale - 1) This patch fixes the problem by adding a new special case for how saturation is computed when scale is zero. Reviewers: RKSimon, bevinh, leonardchan, spatel Reviewed By: RKSimon Subscribers: wuzish, nemanjai, hiraditya, MaskRay, jsji, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D67071 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371309 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b42e272c3c1..d974c8cf3c7 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6800,26 +6800,37 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const { } // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the - // widened multiplication) aren't all ones or all zeroes. We handled Scale==0 - // above so all the bits to examine is in Hi. + // widened multiplication) aren't all ones or all zeroes. + + SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT); + SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT); + + if (Scale == 0) { + SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo, + DAG.getConstant(VTSize - 1, dl, ShiftTy)); + SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE); + // Saturated to SatMin if wide product is negative, and SatMax if wide + // product is positive ... + SDValue Zero = DAG.getConstant(0, dl, VT); + SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax, + ISD::SETLT); + // ... but only if we overflowed. + return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result); + } + + // We handled Scale==0 above so all the bits to examine is in Hi. // Saturate to max if ((Hi >> (Scale - 1)) > 0), // which is the same as if (Hi > (1 << (Scale - 1)) - 1) - APInt MaxVal = APInt::getSignedMaxValue(VTSize); SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1), dl, VT); - Result = DAG.getSelectCC(dl, Hi, LowMask, - DAG.getConstant(MaxVal, dl, VT), Result, - ISD::SETGT); + Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT); // Saturate to min if (Hi >> (Scale - 1)) < -1), // which is the same as if (HI < (-1 << (Scale - 1)) - APInt MinVal = APInt::getSignedMinValue(VTSize); SDValue HighMask = DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1), dl, VT); - Result = DAG.getSelectCC(dl, Hi, HighMask, - DAG.getConstant(MinVal, dl, VT), Result, - ISD::SETLT); + Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT); return Result; } diff --git a/test/CodeGen/PowerPC/smulfixsat.ll b/test/CodeGen/PowerPC/smulfixsat.ll new file mode 100644 index 00000000000..cbba6f8f897 --- /dev/null +++ b/test/CodeGen/PowerPC/smulfixsat.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ppc32 | FileCheck %s + +declare i32 @llvm.smul.fix.sat.i32 (i32, i32, i32) + +define i32 @func1(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: func1: +; CHECK: # %bb.0: +; CHECK-NEXT: lis 5, 32767 +; CHECK-NEXT: mulhw. 6, 3, 4 +; CHECK-NEXT: lis 7, -32768 +; CHECK-NEXT: mullw 3, 3, 4 +; CHECK-NEXT: ori 4, 5, 65535 +; CHECK-NEXT: srawi 5, 3, 31 +; CHECK-NEXT: cmplw 1, 6, 5 +; CHECK-NEXT: bc 12, 0, .LBB0_1 +; CHECK-NEXT: b .LBB0_2 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT: addi 4, 7, 0 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT: bclr 12, 6, 0 +; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: ori 3, 4, 0 +; CHECK-NEXT: blr + %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 0) + ret i32 %tmp +} + +define i32 @func2(i32 %x, i32 %y) nounwind { +; CHECK-LABEL: func2: +; CHECK: # %bb.0: +; CHECK-NEXT: mulhw. 6, 3, 4 +; CHECK-NEXT: lis 5, 32767 +; CHECK-NEXT: mullw 3, 3, 4 +; CHECK-NEXT: rotlwi 3, 3, 31 +; CHECK-NEXT: ori 4, 5, 65535 +; CHECK-NEXT: rlwimi 3, 6, 31, 0, 0 +; CHECK-NEXT: bc 12, 1, .LBB1_1 +; CHECK-NEXT: b .LBB1_2 +; CHECK-NEXT: .LBB1_1: +; CHECK-NEXT: addi 3, 4, 0 +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: cmpwi 6, -1 +; CHECK-NEXT: lis 4, -32768 +; CHECK-NEXT: bc 12, 0, .LBB1_3 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB1_3: +; CHECK-NEXT: addi 3, 4, 0 +; CHECK-NEXT: blr + %tmp = call i32 @llvm.smul.fix.sat.i32(i32 %x, i32 %y, i32 1) + ret i32 %tmp +}