From: Sanjay Patel Date: Sun, 31 Mar 2019 15:01:30 +0000 (+0000) Subject: [InstCombine] canonicalize select shuffles by commuting X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=3a12f8f3d29c6b93a3da6799e268880613ea5cde;p=llvm [InstCombine] canonicalize select shuffles by commuting In PR41304: https://bugs.llvm.org/show_bug.cgi?id=41304 ...we have a case where we want to fold a binop of select-shuffle (blended) values. Rather than try to match commuted variants of the pattern, we can canonicalize the shuffles and check for mask equality with commuted operands. We don't produce arbitrary shuffle masks in instcombine, but select-shuffles are a special case that the backend is required to handle because we already canonicalize vector select to this shuffle form. So there should be no codegen difference from this change. It's possible that this improves CSE in IR though. Differential Revision: https://reviews.llvm.org/D60016 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@357366 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index a82cedab25d..91e04ea59c8 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -2043,6 +2043,10 @@ public: return User::operator new(s, 3); } + /// Swap the first 2 operands and adjust the mask to preserve the semantics + /// of the instruction. + void commute(); + /// Return true if a shufflevector instruction can be /// formed with the specified operands. static bool isValidOperands(const Value *V1, const Value *V2, diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 09d4395db3e..d65c61c891a 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -1750,6 +1750,25 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask, setName(Name); } +void ShuffleVectorInst::commute() { + int NumOpElts = Op<0>()->getType()->getVectorNumElements(); + int NumMaskElts = getMask()->getType()->getVectorNumElements(); + SmallVector NewMask(NumMaskElts); + Type *Int32Ty = Type::getInt32Ty(getContext()); + for (int i = 0; i != NumMaskElts; ++i) { + int MaskElt = getMaskValue(i); + if (MaskElt == -1) { + NewMask[i] = UndefValue::get(Int32Ty); + continue; + } + assert(MaskElt >= 0 && MaskElt < 2 * NumOpElts && "Out-of-range mask"); + MaskElt = (MaskElt < NumOpElts) ? MaskElt + NumOpElts : MaskElt - NumOpElts; + NewMask[i] = ConstantInt::get(Int32Ty, MaskElt); + } + Op<2>() = ConstantVector::get(NewMask); + Op<0>().swap(Op<1>()); +} + bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, const Value *Mask) { // V1 and V2 must be vectors of the same type. diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index cb5c22c69be..49c29fe651b 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1343,6 +1343,15 @@ static Instruction *foldSelectShuffle(ShuffleVectorInst &Shuf, if (!Shuf.isSelect()) return nullptr; + // Canonicalize to choose from operand 0 first. + unsigned NumElts = Shuf.getType()->getVectorNumElements(); + if (Shuf.getMaskValue(0) >= (int)NumElts) { + assert(!isa(Shuf.getOperand(1)) && + "Not expecting undef shuffle operand with select mask"); + Shuf.commute(); + return &Shuf; + } + if (Instruction *I = foldSelectShuffleWith1Binop(Shuf)) return I; diff --git a/test/Transforms/InstCombine/X86/blend_x86.ll b/test/Transforms/InstCombine/X86/blend_x86.ll index 676d8ba9e7d..864e2b9aa67 100644 --- a/test/Transforms/InstCombine/X86/blend_x86.ll +++ b/test/Transforms/InstCombine/X86/blend_x86.ll @@ -28,7 +28,7 @@ define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[ABCD:%.*]], <4 x float> [[XYZW:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[XYZW:%.*]], <4 x float> [[ABCD:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> ) @@ -53,7 +53,7 @@ define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) { define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[ABCD:%.*]], <16 x i8> [[XYZW:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[XYZW:%.*]], <16 x i8> [[ABCD:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> ) @@ -103,7 +103,7 @@ define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %s define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) { ; CHECK-LABEL: @constant_blendvps_avx( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[ABCD:%.*]], <8 x float> [[XYZW:%.*]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[XYZW:%.*]], <8 x float> [[ABCD:%.*]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[TMP1]] ; %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> ) @@ -128,7 +128,7 @@ define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %se define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) { ; CHECK-LABEL: @constant_pblendvb_avx2( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[ABCD:%.*]], <32 x i8> [[XYZW:%.*]], <32 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[XYZW:%.*]], <32 x i8> [[ABCD:%.*]], <32 x i32> ; CHECK-NEXT: ret <32 x i8> [[TMP1]] ; %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, diff --git a/test/Transforms/InstCombine/X86/x86-insertps.ll b/test/Transforms/InstCombine/X86/x86-insertps.ll index 4ec61d3ef33..54f00644f10 100644 --- a/test/Transforms/InstCombine/X86/x86-insertps.ll +++ b/test/Transforms/InstCombine/X86/x86-insertps.ll @@ -69,7 +69,7 @@ define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) { define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) { ; CHECK-LABEL: @insertps_0x00( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V1:%.*]], <4 x float> [[V2:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V2:%.*]], <4 x float> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[TMP1]] ; %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0) diff --git a/test/Transforms/InstCombine/X86/x86-sse4a.ll b/test/Transforms/InstCombine/X86/x86-sse4a.ll index 70ed3c63ca3..e33a382b7e1 100644 --- a/test/Transforms/InstCombine/X86/x86-sse4a.ll +++ b/test/Transforms/InstCombine/X86/x86-sse4a.ll @@ -203,7 +203,7 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) { define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) { ; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V:%.*]], <16 x i8> [[I:%.*]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[I:%.*]], <16 x i8> [[V:%.*]], <16 x i32> ; CHECK-NEXT: ret <16 x i8> [[TMP1]] ; %1 = bitcast <16 x i8> %v to <2 x i64> diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll index b8decb07114..3f02554e7de 100644 --- a/test/Transforms/InstCombine/logical-select.ll +++ b/test/Transforms/InstCombine/logical-select.ll @@ -455,7 +455,7 @@ define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) { define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) { ; CHECK-LABEL: @vec_sel_consts_weird( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> [[B:%.*]], <3 x i129> [[A:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i129> [[A:%.*]], <3 x i129> [[B:%.*]], <3 x i32> ; CHECK-NEXT: ret <3 x i129> [[TMP1]] ; %and1 = and <3 x i129> %a, diff --git a/test/Transforms/InstCombine/phi-select-constant.ll b/test/Transforms/InstCombine/phi-select-constant.ll index bc2f9ac202b..9d1c973925b 100644 --- a/test/Transforms/InstCombine/phi-select-constant.ll +++ b/test/Transforms/InstCombine/phi-select-constant.ll @@ -77,7 +77,7 @@ final: define <2 x i8> @vec3(i1 %cond1, i1 %cond2, <2 x i1> %x, <2 x i8> %y, <2 x i8> %z) { ; CHECK-LABEL: @vec3( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[PHITMP1:%.*]] = shufflevector <2 x i8> [[Y:%.*]], <2 x i8> [[Z:%.*]], <2 x i32> +; CHECK-NEXT: [[PHITMP1:%.*]] = shufflevector <2 x i8> [[Z:%.*]], <2 x i8> [[Y:%.*]], <2 x i32> ; CHECK-NEXT: br i1 [[COND1:%.*]], label [[IF1:%.*]], label [[ELSE:%.*]] ; CHECK: if1: ; CHECK-NEXT: [[PHITMP2:%.*]] = shufflevector <2 x i8> [[Y]], <2 x i8> [[Z]], <2 x i32> diff --git a/test/Transforms/InstCombine/shuffle_select.ll b/test/Transforms/InstCombine/shuffle_select.ll index f370ad624e0..991f390682a 100644 --- a/test/Transforms/InstCombine/shuffle_select.ll +++ b/test/Transforms/InstCombine/shuffle_select.ll @@ -158,7 +158,7 @@ define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) { define <4 x i32> @lshr_constant_op1(<4 x i32> %v) { ; CHECK-LABEL: @lshr_constant_op1( ; CHECK-NEXT: [[B:%.*]] = lshr exact <4 x i32> , [[V:%.*]] -; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> +; CHECK-NEXT: [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[S]] ; %b = lshr exact <4 x i32> , %v @@ -977,7 +977,7 @@ define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = lshr <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -989,7 +989,7 @@ define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @lshr_2_vars_exact( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = lshr exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1033,7 +1033,7 @@ define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) { ; CHECK-LABEL: @ashr_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V0:%.*]], <3 x i32> [[V1:%.*]], <3 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> ; CHECK-NEXT: [[T3:%.*]] = ashr <3 x i32> [[TMP1]], ; CHECK-NEXT: ret <3 x i32> [[T3]] ; @@ -1060,7 +1060,7 @@ define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) { define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @or_2_vars( ; CHECK-NEXT: [[T1:%.*]] = or <4 x i32> [[V0:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = or <4 x i32> [[TMP1]], ; CHECK-NEXT: call void @use_v4i32(<4 x i32> [[T1]]) ; CHECK-NEXT: ret <4 x i32> [[T3]] @@ -1095,7 +1095,7 @@ define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = udiv <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1107,7 +1107,7 @@ define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @udiv_2_vars_exact( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = udiv exact <4 x i32> , [[TMP1]] ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1340,7 +1340,7 @@ define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { ; CHECK-LABEL: @shl_mul_not_constant_shift_amount( ; CHECK-NEXT: [[T1:%.*]] = shl <4 x i32> , [[V0:%.*]] ; CHECK-NEXT: [[T2:%.*]] = mul <4 x i32> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> ; CHECK-NEXT: ret <4 x i32> [[T3]] ; %t1 = shl <4 x i32> , %v0 @@ -1353,7 +1353,7 @@ define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) { define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @mul_shl_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1365,7 +1365,7 @@ define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) { define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @shl_mul_2_vars( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = mul <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1414,7 +1414,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], ; CHECK-NEXT: [[T1:%.*]] = or <4 x i8> [[V0]], ; CHECK-NEXT: [[T2:%.*]] = add <4 x i8> [[V0]], -; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T1]], <4 x i8> [[T2]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear not enough top bits @@ -1429,7 +1429,7 @@ define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) { define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { ; CHECK-LABEL: @add_or_2_vars( ; CHECK-NEXT: [[V0:%.*]] = shl <4 x i32> [[V:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> ; CHECK-NEXT: [[T3:%.*]] = add <4 x i32> [[TMP1]], ; CHECK-NEXT: ret <4 x i32> [[T3]] ; @@ -1443,8 +1443,8 @@ define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) { define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) { ; CHECK-LABEL: @or_add_2_vars( ; CHECK-NEXT: [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V0]], <4 x i8> [[V1:%.*]], <4 x i32> -; CHECK-NEXT: [[T3:%.*]] = add <4 x i8> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> +; CHECK-NEXT: [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], ; CHECK-NEXT: ret <4 x i8> [[T3]] ; %v0 = lshr <4 x i8> %v, ; clear the top bits diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll index 7313b02034e..a56152cca59 100644 --- a/test/Transforms/InstCombine/vec_demanded_elts.ll +++ b/test/Transforms/InstCombine/vec_demanded_elts.ll @@ -65,7 +65,7 @@ declare i32 @fgetc(i8*) define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind { ; CHECK-LABEL: @dead_shuffle_elt( ; CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <2 x float> [[Y:%.*]], <2 x float> undef, <4 x i32> -; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[SHUFFLE_I]], <4 x i32> +; CHECK-NEXT: [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> [[SHUFFLE_I]], <4 x float> [[X:%.*]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[SHUFFLE9_I]] ; %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> diff --git a/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/test/Transforms/SLPVectorizer/X86/alternate-fp.ll index 9602314c6d4..de7c59286ac 100644 --- a/test/Transforms/SLPVectorizer/X86/alternate-fp.ll +++ b/test/Transforms/SLPVectorizer/X86/alternate-fp.ll @@ -10,7 +10,7 @@ define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: @fadd_fsub_v8f32( ; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]] ; CHECK-NEXT: [[TMP2:%.*]] = fsub <8 x float> [[A]], [[B]] -; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; CHECK-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 @@ -52,7 +52,7 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; SSE-LABEL: @fmul_fdiv_v8f32( ; SSE-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]] ; SSE-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]] -; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; SSE-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; SSE-NEXT: ret <8 x float> [[R7]] ; ; SLM-LABEL: @fmul_fdiv_v8f32( @@ -75,13 +75,13 @@ define <8 x float> @fmul_fdiv_v8f32(<8 x float> %a, <8 x float> %b) { ; AVX-LABEL: @fmul_fdiv_v8f32( ; AVX-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]] ; AVX-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]] -; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; AVX-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; AVX-NEXT: ret <8 x float> [[R7]] ; ; AVX512-LABEL: @fmul_fdiv_v8f32( ; AVX512-NEXT: [[TMP1:%.*]] = fmul <8 x float> [[A:%.*]], [[B:%.*]] ; AVX512-NEXT: [[TMP2:%.*]] = fdiv <8 x float> [[A]], [[B]] -; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP2]], <8 x float> [[TMP1]], <8 x i32> +; AVX512-NEXT: [[R7:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x i32> ; AVX512-NEXT: ret <8 x float> [[R7]] ; %a0 = extractelement <8 x float> %a, i32 0 diff --git a/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/test/Transforms/SLPVectorizer/X86/alternate-int.ll index a04beed1a45..008d34ef8e2 100644 --- a/test/Transforms/SLPVectorizer/X86/alternate-int.ll +++ b/test/Transforms/SLPVectorizer/X86/alternate-int.ll @@ -78,7 +78,7 @@ define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) { ; SSE-LABEL: @add_mul_v4i32( ; SSE-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] ; SSE-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; SSE-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; SSE-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; SSE-NEXT: ret <4 x i32> [[R3]] ; ; SLM-LABEL: @add_mul_v4i32( @@ -103,13 +103,13 @@ define <4 x i32> @add_mul_v4i32(<4 x i32> %a, <4 x i32> %b) { ; AVX-LABEL: @add_mul_v4i32( ; AVX-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] ; AVX-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; AVX-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; AVX-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; AVX-NEXT: ret <4 x i32> [[R3]] ; ; AVX512-LABEL: @add_mul_v4i32( ; AVX512-NEXT: [[TMP1:%.*]] = mul <4 x i32> [[A:%.*]], [[B:%.*]] ; AVX512-NEXT: [[TMP2:%.*]] = add <4 x i32> [[A]], [[B]] -; AVX512-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; AVX512-NEXT: [[R3:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> ; AVX512-NEXT: ret <4 x i32> [[R3]] ; %a0 = extractelement <4 x i32> %a, i32 0