From: Simon Pilgrim Date: Fri, 9 Aug 2019 12:44:20 +0000 (+0000) Subject: [X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=246185486040c9c54db2db6f8efcc54bb9ce1267;p=llvm [X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825) As discussed on PR42825, if we are inverting the selection mask we can just swap the inputs and avoid the inversion. Differential Revision: https://reviews.llvm.org/D65522 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368438 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fa5f788ff9f..6dd5ec87e7b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -36615,6 +36615,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) return V; + // select(~Cond, X, Y) -> select(Cond, Y, X) + if (CondVT.getScalarType() != MVT::i1) + if (SDValue CondNot = IsNOT(Cond, DAG)) + return DAG.getNode(N->getOpcode(), DL, VT, + DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // Custom action for SELECT MMX if (VT == MVT::x86mmx) { LHS = DAG.getBitcast(MVT::i64, LHS); diff --git a/test/CodeGen/X86/combine-sse41-intrinsics.ll b/test/CodeGen/X86/combine-sse41-intrinsics.ll index 0774f7fe20d..cc4dee33c61 100644 --- a/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -157,10 +157,9 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: xor_pblendvb: ; CHECK: # %bb.0: ; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movdqa %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = xor <16 x i8> %a2, %2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1) @@ -170,11 +169,10 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: xor_blendvps: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movaps %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = bitcast <4 x float> %a2 to <4 x i32> %2 = xor <4 x i32> %1, @@ -186,11 +184,10 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> % define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { ; CHECK-LABEL: xor_blendvpd: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 +; CHECK-NEXT: movapd %xmm0, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = bitcast <2 x double> %a2 to <4 x i32> %2 = xor <4 x i32> %1, diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll index 8f0118d39bd..8af4a680c77 100644 --- a/test/CodeGen/X86/nontemporal-loads.ll +++ b/test/CodeGen/X86/nontemporal-loads.ll @@ -1852,25 +1852,20 @@ define <16 x i32> @test_masked_v16i32(i8 * %addr, <16 x i32> %old, <16 x i32> %m ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm4 ; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 -; AVX1-NEXT: vblendvps %ymm3, %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vblendvps %ymm3, %ymm1, %ymm4, %ymm1 ; AVX1-NEXT: vmovntdqa (%rdi), %xmm3 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_masked_v16i32: