From: Sanjay Patel Date: Mon, 17 Dec 2018 20:27:43 +0000 (+0000) Subject: [InstCombine] don't widen an arbitrary sequence of vector ops (PR40032) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=e23763e5beb9ea8ff571a9665a10477509a5620d;p=llvm [InstCombine] don't widen an arbitrary sequence of vector ops (PR40032) The problem is shown specifically for a case with vector multiply here: https://bugs.llvm.org/show_bug.cgi?id=40032 ...and this might mask the original backend bug for ARM shown in: https://bugs.llvm.org/show_bug.cgi?id=39967 As the test diffs here show, we were (and probably still aren't) doing these kinds of transforms in a principled way. We are producing more or equal wide instructions than we started with in some cases, so we still need to restrict/correct other transforms from overstepping. If there are perf regressions from this change, we can either carve out exceptions to the general IR rules, or improve the backend to do these transforms when we know the transform is profitable. That's probably similar to a change like D55448. Differential Revision: https://reviews.llvm.org/D55744 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349389 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 582f69fb2d2..7a8c762d494 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1107,12 +1107,9 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) { Value *Src = CI.getOperand(0); Type *SrcTy = Src->getType(), *DestTy = CI.getType(); - // Attempt to extend the entire input expression tree to the destination - // type. Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. + // Try to extend the entire expression tree to the wide destination type. unsigned BitsToClear; - if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) && + if (shouldChangeType(SrcTy, DestTy) && canEvaluateZExtd(Src, DestTy, BitsToClear, *this, &CI)) { assert(BitsToClear <= SrcTy->getScalarSizeInBits() && "Can't clear more bits than in SrcTy"); @@ -1389,12 +1386,8 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) { return replaceInstUsesWith(CI, ZExt); } - // Attempt to extend the entire input expression tree to the destination - // type. Only do this if the dest type is a simple type, don't convert the - // expression tree to something weird like i93 unless the source is also - // strange. - if ((DestTy->isVectorTy() || shouldChangeType(SrcTy, DestTy)) && - canEvaluateSExtd(Src, DestTy)) { + // Try to extend the entire expression tree to the wide destination type. + if (shouldChangeType(SrcTy, DestTy) && canEvaluateSExtd(Src, DestTy)) { // Okay, we can transform this! Insert the new expression now. LLVM_DEBUG( dbgs() << "ICE: EvaluateInDifferentType converting expression type" diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 7c9dbcb66cc..be7d43bbcf2 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -183,7 +183,10 @@ bool InstCombiner::shouldChangeType(unsigned FromWidth, /// a fundamental type in IR, and there are many specialized optimizations for /// i1 types. bool InstCombiner::shouldChangeType(Type *From, Type *To) const { - assert(From->isIntegerTy() && To->isIntegerTy()); + // TODO: This could be extended to allow vectors. Datalayout changes might be + // needed to properly support that. + if (!From->isIntegerTy() || !To->isIntegerTy()) + return false; unsigned FromWidth = From->getPrimitiveSizeInBits(); unsigned ToWidth = To->getPrimitiveSizeInBits(); diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll index 43dc2fc4be1..b6d1eda0601 100644 --- a/test/Transforms/InstCombine/cast.ll +++ b/test/Transforms/InstCombine/cast.ll @@ -592,9 +592,11 @@ define i64 @test46(i64 %A) { define <2 x i64> @test46vec(<2 x i64> %A) { ; CHECK-LABEL: @test46vec( -; CHECK-NEXT: [[C:%.*]] = shl <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[D:%.*]] = and <2 x i64> [[C]], -; CHECK-NEXT: ret <2 x i64> [[D]] +; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[C:%.*]] = shl <2 x i32> [[B]], +; CHECK-NEXT: [[D:%.*]] = and <2 x i32> [[C]], +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[D]] to <2 x i64> +; CHECK-NEXT: ret <2 x i64> [[E]] ; %B = trunc <2 x i64> %A to <2 x i32> %C = and <2 x i32> %B, @@ -749,9 +751,9 @@ define i64 @test56(i16 %A) { define <2 x i64> @test56vec(<2 x i16> %A) { ; CHECK-LABEL: @test56vec( -; CHECK-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[P354:%.*]] = lshr <2 x i64> [[P353]], -; CHECK-NEXT: [[P355:%.*]] = and <2 x i64> [[P354]], +; CHECK-NEXT: [[P353:%.*]] = sext <2 x i16> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[P354:%.*]] = lshr <2 x i32> [[P353]], +; CHECK-NEXT: [[P355:%.*]] = zext <2 x i32> [[P354]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[P355]] ; %p353 = sext <2 x i16> %A to <2 x i32> @@ -774,8 +776,9 @@ define i64 @test57(i64 %A) { define <2 x i64> @test57vec(<2 x i64> %A) { ; CHECK-LABEL: @test57vec( -; CHECK-NEXT: [[C:%.*]] = lshr <2 x i64> [[A:%.*]], -; CHECK-NEXT: [[E:%.*]] = and <2 x i64> [[C]], +; CHECK-NEXT: [[B:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], +; CHECK-NEXT: [[E:%.*]] = zext <2 x i32> [[C]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[E]] ; %B = trunc <2 x i64> %A to <2 x i32> diff --git a/test/Transforms/InstCombine/select-bitext.ll b/test/Transforms/InstCombine/select-bitext.ll index 07d34b49b0b..d44be273573 100644 --- a/test/Transforms/InstCombine/select-bitext.ll +++ b/test/Transforms/InstCombine/select-bitext.ll @@ -98,9 +98,8 @@ define i64 @trunc_sel_larger_sext(i32 %a, i1 %cmp) { define <2 x i64> @trunc_sel_larger_sext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_larger_sext_vec( -; CHECK-NEXT: [[TRUNC:%.*]] = zext <2 x i32> [[A:%.*]] to <2 x i64> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[TRUNC]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i32> [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TRUNC]] to <2 x i64> ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT]] ; @@ -125,9 +124,8 @@ define i32 @trunc_sel_smaller_sext(i64 %a, i1 %cmp) { define <2 x i32> @trunc_sel_smaller_sext_vec(<2 x i64> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_smaller_sext_vec( -; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[TRUNC]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i32> [[SEXT]], +; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i16> +; CHECK-NEXT: [[TMP1:%.*]] = sext <2 x i16> [[TRUNC]] to <2 x i32> ; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; @@ -152,9 +150,9 @@ define i32 @trunc_sel_equal_sext(i32 %a, i1 %cmp) { define <2 x i32> @trunc_sel_equal_sext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_equal_sext_vec( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = ashr exact <2 x i32> [[SEXT]], -; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = ashr exact <2 x i32> [[TMP1]], +; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; %trunc = trunc <2 x i32> %a to <2 x i16> @@ -178,9 +176,9 @@ define i64 @trunc_sel_larger_zext(i32 %a, i1 %cmp) { define <2 x i64> @trunc_sel_larger_zext_vec(<2 x i32> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_larger_zext_vec( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64> -; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP2]], <2 x i64> +; CHECK-NEXT: [[TRUNC_MASK:%.*]] = and <2 x i32> [[A:%.*]], +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i32> [[TRUNC_MASK]] to <2 x i64> +; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i64> [[TMP1]], <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[EXT]] ; %trunc = trunc <2 x i32> %a to <2 x i16> @@ -204,9 +202,9 @@ define i32 @trunc_sel_smaller_zext(i64 %a, i1 %cmp) { define <2 x i32> @trunc_sel_smaller_zext_vec(<2 x i64> %a, <2 x i1> %cmp) { ; CHECK-LABEL: @trunc_sel_smaller_zext_vec( -; CHECK-NEXT: [[TRUNC:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[TRUNC]], -; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP1]], <2 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[A:%.*]] to <2 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i32> [[TMP1]], +; CHECK-NEXT: [[EXT:%.*]] = select <2 x i1> [[CMP:%.*]], <2 x i32> [[TMP2]], <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[EXT]] ; %trunc = trunc <2 x i64> %a to <2 x i16> diff --git a/test/Transforms/InstCombine/vector-casts.ll b/test/Transforms/InstCombine/vector-casts.ll index d7b31f83602..d2acefc0fbf 100644 --- a/test/Transforms/InstCombine/vector-casts.ll +++ b/test/Transforms/InstCombine/vector-casts.ll @@ -163,8 +163,8 @@ define void @convert(<2 x i32>* %dst.addr, <2 x i64> %src) { define <2 x i65> @foo(<2 x i64> %t) { ; CHECK-LABEL: @foo( -; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[TMP1]] to <2 x i65> +; CHECK-NEXT: [[A_MASK:%.*]] = and <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[B:%.*]] = zext <2 x i64> [[A_MASK]] to <2 x i65> ; CHECK-NEXT: ret <2 x i65> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -174,8 +174,8 @@ define <2 x i65> @foo(<2 x i64> %t) { define <2 x i64> @bar(<2 x i65> %t) { ; CHECK-LABEL: @bar( -; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[A]], +; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> +; CHECK-NEXT: [[B:%.*]] = and <2 x i64> [[TMP1]], ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -185,9 +185,8 @@ define <2 x i64> @bar(<2 x i65> %t) { define <2 x i64> @bars(<2 x i65> %t) { ; CHECK-LABEL: @bars( -; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i64> -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[A]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[A:%.*]] = trunc <2 x i65> [[T:%.*]] to <2 x i32> +; CHECK-NEXT: [[B:%.*]] = sext <2 x i32> [[A]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i65> %t to <2 x i32> @@ -197,8 +196,8 @@ define <2 x i64> @bars(<2 x i65> %t) { define <2 x i64> @quxs(<2 x i64> %t) { ; CHECK-LABEL: @quxs( -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[T:%.*]], -; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> [[T:%.*]], +; CHECK-NEXT: [[B:%.*]] = ashr exact <2 x i64> [[TMP1]], ; CHECK-NEXT: ret <2 x i64> [[B]] ; %a = trunc <2 x i64> %t to <2 x i32> @@ -384,9 +383,10 @@ define <3 x float> @fptrunc_inselt2(<3 x double> %x) { define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @sext_less_casting_with_wideop( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i64> [[MUL]], -; CHECK-NEXT: [[R:%.*]] = ashr exact <2 x i64> [[SEXT]], +; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> +; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]] +; CHECK-NEXT: [[R:%.*]] = sext <2 x i32> [[MUL]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[R]] ; %xnarrow = trunc <2 x i64> %x to <2 x i32> @@ -398,8 +398,10 @@ define <2 x i64> @sext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { define <2 x i64> @zext_less_casting_with_wideop(<2 x i64> %x, <2 x i64> %y) { ; CHECK-LABEL: @zext_less_casting_with_wideop( -; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i64> [[X:%.*]], [[Y:%.*]] -; CHECK-NEXT: [[R:%.*]] = and <2 x i64> [[MUL]], +; CHECK-NEXT: [[XNARROW:%.*]] = trunc <2 x i64> [[X:%.*]] to <2 x i32> +; CHECK-NEXT: [[YNARROW:%.*]] = trunc <2 x i64> [[Y:%.*]] to <2 x i32> +; CHECK-NEXT: [[MUL:%.*]] = mul <2 x i32> [[XNARROW]], [[YNARROW]] +; CHECK-NEXT: [[R:%.*]] = zext <2 x i32> [[MUL]] to <2 x i64> ; CHECK-NEXT: ret <2 x i64> [[R]] ; %xnarrow = trunc <2 x i64> %x to <2 x i32>