From 2f0fa8be4dbd28cec092291e2ff3f53b7ea90da4 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Tue, 6 Aug 2019 20:32:07 +0000 Subject: [PATCH] Revert [InstCombine] Shift amount reassociation: shl-trunc-shl pattern This reverts r368059 (git commit 0f957109761913c563922f1afd4ceb29ef21bbd0) This caused Clang to assert while self-hosting and compiling SystemZInstrInfo.cpp. Reduction is running. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368084 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/PatternMatch.h | 6 -- .../InstCombine/InstCombineShifts.cpp | 90 +++++-------------- ...mount-reassociation-with-truncation-shl.ll | 61 +++++++++---- 3 files changed, 68 insertions(+), 89 deletions(-) diff --git a/include/llvm/IR/PatternMatch.h b/include/llvm/IR/PatternMatch.h index 62cc59a5777..2a9c3dd8c20 100644 --- a/include/llvm/IR/PatternMatch.h +++ b/include/llvm/IR/PatternMatch.h @@ -1270,12 +1270,6 @@ inline CastClass_match m_ZExt(const OpTy &Op) { return CastClass_match(Op); } -template -inline match_combine_or, OpTy> -m_ZExtOrSelf(const OpTy &Op) { - return m_CombineOr(m_ZExt(Op), Op); -} - template inline match_combine_or, CastClass_match> diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index b822761422c..c0a1df6b9a7 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -27,84 +27,42 @@ using namespace PatternMatch; // This is valid for any shift, but they must be identical. static Instruction * reassociateShiftAmtsOfTwoSameDirectionShifts(BinaryOperator *Sh0, - const SimplifyQuery &SQ, - InstCombiner::BuilderTy &Builder) { - // Look for a shift of some instruction, ignore zext of shift amount if any. - Instruction *Sh0Op0; - Value *ShAmt0; - if (!match(Sh0, - m_Shift(m_Instruction(Sh0Op0), m_ZExtOrSelf(m_Value(ShAmt0))))) - return nullptr; - - // If there is a truncation between the two shifts, we must make note of it - // and look through it. The truncation imposes additional constraints on the - // transform. + const SimplifyQuery &SQ) { + // Look for: (x shiftopcode ShAmt0) shiftopcode ShAmt1 + Value *X, *ShAmt1, *ShAmt0; Instruction *Sh1; - Value *Trunc = nullptr; - match(Sh0Op0, - m_CombineOr(m_CombineAnd(m_Trunc(m_Instruction(Sh1)), m_Value(Trunc)), - m_Instruction(Sh1))); - - // Inner shift: (x shiftopcode ShAmt1) - Value *X, *ShAmt1; - if (!match(Sh1, m_Shift(m_Value(X), m_ZExtOrSelf(m_Value(ShAmt1))))) + if (!match(Sh0, m_Shift(m_CombineAnd(m_Shift(m_Value(X), m_Value(ShAmt1)), + m_Instruction(Sh1)), + m_Value(ShAmt0)))) return nullptr; // The shift opcodes must be identical. Instruction::BinaryOps ShiftOpcode = Sh0->getOpcode(); if (ShiftOpcode != Sh1->getOpcode()) return nullptr; - - // Did we match a pattern with truncation ? - if (Trunc) { - // For right-shifts we can't do any such simplifications. Leave as-is. - if (ShiftOpcode != Instruction::BinaryOps::Shl) - return nullptr; // FIXME: still could perform constant-folding. - // If we saw truncation, we'll need to produce extra instruction, - // and for that one of the operands of the shift must be one-use. - if (!match(Sh0, m_c_BinOp(m_OneUse(m_Value()), m_Value()))) - return nullptr; - } - // Can we fold (ShAmt0+ShAmt1) ? - auto *NewShAmt = dyn_cast_or_null( - SimplifyAddInst(ShAmt0, ShAmt1, /*isNSW=*/false, /*isNUW=*/false, - SQ.getWithInstruction(Sh0))); + Value *NewShAmt = SimplifyBinOp(Instruction::BinaryOps::Add, ShAmt0, ShAmt1, + SQ.getWithInstruction(Sh0)); if (!NewShAmt) return nullptr; // Did not simplify. - // Is the new shift amount smaller than the bit width of inner shift? - if (!match(NewShAmt, m_SpecificInt_ICMP( - ICmpInst::Predicate::ICMP_ULT, - APInt(NewShAmt->getType()->getScalarSizeInBits(), - X->getType()->getScalarSizeInBits())))) - return nullptr; // FIXME: could perform constant-folding. - + // Is the new shift amount smaller than the bit width? + // FIXME: could also rely on ConstantRange. + unsigned BitWidth = X->getType()->getScalarSizeInBits(); + if (!match(NewShAmt, m_SpecificInt_ICMP(ICmpInst::Predicate::ICMP_ULT, + APInt(BitWidth, BitWidth)))) + return nullptr; // All good, we can do this fold. - NewShAmt = ConstantExpr::getZExtOrBitCast(NewShAmt, X->getType()); - BinaryOperator *NewShift = BinaryOperator::Create(ShiftOpcode, X, NewShAmt); - - // The flags can only be propagated if there wasn't a trunc. - if (!Trunc) { - // If the pattern did not involve trunc, and both of the original shifts - // had the same flag set, preserve the flag. - if (ShiftOpcode == Instruction::BinaryOps::Shl) { - NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && - Sh1->hasNoUnsignedWrap()); - NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && - Sh1->hasNoSignedWrap()); - } else { - NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); - } + // If both of the original shifts had the same flag set, preserve the flag. + if (ShiftOpcode == Instruction::BinaryOps::Shl) { + NewShift->setHasNoUnsignedWrap(Sh0->hasNoUnsignedWrap() && + Sh1->hasNoUnsignedWrap()); + NewShift->setHasNoSignedWrap(Sh0->hasNoSignedWrap() && + Sh1->hasNoSignedWrap()); + } else { + NewShift->setIsExact(Sh0->isExact() && Sh1->isExact()); } - - Instruction *Ret = NewShift; - if (Trunc) { - Builder.Insert(NewShift); - Ret = CastInst::Create(Instruction::Trunc, NewShift, Sh0->getType()); - } - - return Ret; + return NewShift; } // If we have some pattern that leaves only some low bits set, and then performs @@ -200,7 +158,7 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) { return Res; if (Instruction *NewShift = - reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ, Builder)) + reassociateShiftAmtsOfTwoSameDirectionShifts(&I, SQ)) return NewShift; // (C1 shift (A add C2)) -> (C1 shift C2) shift A) diff --git a/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll index 3c63fa7aada..0f5965ba59e 100644 --- a/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll +++ b/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll @@ -12,8 +12,12 @@ define i16 @t0(i32 %x, i16 %y) { ; CHECK-LABEL: @t0( -; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16 -; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8 +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -27,8 +31,12 @@ define i16 @t0(i32 %x, i16 %y) { define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t1_vec_splat( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]] ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y @@ -42,8 +50,12 @@ define <2 x i16> @t1_vec_splat(<2 x i32> %x, <2 x i16> %y) { define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) { ; CHECK-LABEL: @t2_vec_nonsplat( -; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <2 x i32> [[TMP1]] to <2 x i16> +; CHECK-NEXT: [[T0:%.*]] = sub <2 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <2 x i16> [[T0]] to <2 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <2 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <2 x i32> [[T2]] to <2 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <2 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = shl <2 x i16> [[T3]], [[T4]] ; CHECK-NEXT: ret <2 x i16> [[T5]] ; %t0 = sub <2 x i16> , %y @@ -59,8 +71,12 @@ define <2 x i16> @t2_vec_nonsplat(<2 x i32> %x, <2 x i16> %y) { define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t3_vec_nonsplat_undef0( -; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]] ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -74,8 +90,12 @@ define <3 x i16> @t3_vec_nonsplat_undef0(<3 x i32> %x, <3 x i16> %y) { define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t4_vec_nonsplat_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]] ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -89,8 +109,12 @@ define <3 x i16> @t4_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { define <3 x i16> @t5_vec_nonsplat_undef1(<3 x i32> %x, <3 x i16> %y) { ; CHECK-LABEL: @t5_vec_nonsplat_undef1( -; CHECK-NEXT: [[TMP1:%.*]] = shl <3 x i32> [[X:%.*]], -; CHECK-NEXT: [[T5:%.*]] = trunc <3 x i32> [[TMP1]] to <3 x i16> +; CHECK-NEXT: [[T0:%.*]] = sub <3 x i16> , [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext <3 x i16> [[T0]] to <3 x i32> +; CHECK-NEXT: [[T2:%.*]] = shl <3 x i32> [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc <3 x i32> [[T2]] to <3 x i16> +; CHECK-NEXT: [[T4:%.*]] = add <3 x i16> [[Y]], +; CHECK-NEXT: [[T5:%.*]] = shl <3 x i16> [[T3]], [[T4]] ; CHECK-NEXT: ret <3 x i16> [[T5]] ; %t0 = sub <3 x i16> , %y @@ -113,9 +137,9 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { ; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 ; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] ; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24 ; CHECK-NEXT: call void @use16(i16 [[T3]]) -; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X]] to i16 -; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y @@ -130,10 +154,13 @@ define i16 @t6_extrause0(i32 %x, i16 %y) { define i16 @t7_extrause1(i32 %x, i16 %y) { ; CHECK-LABEL: @t7_extrause1( -; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y:%.*]], -24 +; CHECK-NEXT: [[T0:%.*]] = sub i16 32, [[Y:%.*]] +; CHECK-NEXT: [[T1:%.*]] = zext i16 [[T0]] to i32 +; CHECK-NEXT: [[T2:%.*]] = shl i32 [[X:%.*]], [[T1]] +; CHECK-NEXT: [[T3:%.*]] = trunc i32 [[T2]] to i16 +; CHECK-NEXT: [[T4:%.*]] = add i16 [[Y]], -24 ; CHECK-NEXT: call void @use16(i16 [[T4]]) -; CHECK-NEXT: [[X_TR:%.*]] = trunc i32 [[X:%.*]] to i16 -; CHECK-NEXT: [[T5:%.*]] = shl i16 [[X_TR]], 8 +; CHECK-NEXT: [[T5:%.*]] = shl i16 [[T3]], [[T4]] ; CHECK-NEXT: ret i16 [[T5]] ; %t0 = sub i16 32, %y -- 2.50.1