From 246185486040c9c54db2db6f8efcc54bb9ce1267 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Fri, 9 Aug 2019 12:44:20 +0000 Subject: [PATCH] [X86][SSE] Swap X86ISD::BLENDV inputs with an inverted selection mask (PR42825) As discussed on PR42825, if we are inverting the selection mask we can just swap the inputs and avoid the inversion. Differential Revision: https://reviews.llvm.org/D65522 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368438 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 6 +++++ test/CodeGen/X86/combine-sse41-intrinsics.ll | 25 +++++++++----------- test/CodeGen/X86/nontemporal-loads.ll | 9 ++----- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index fa5f788ff9f..6dd5ec87e7b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -36615,6 +36615,12 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) return V; + // select(~Cond, X, Y) -> select(Cond, Y, X) + if (CondVT.getScalarType() != MVT::i1) + if (SDValue CondNot = IsNOT(Cond, DAG)) + return DAG.getNode(N->getOpcode(), DL, VT, + DAG.getBitcast(CondVT, CondNot), RHS, LHS); + // Custom action for SELECT MMX if (VT == MVT::x86mmx) { LHS = DAG.getBitcast(MVT::i64, LHS); diff --git a/test/CodeGen/X86/combine-sse41-intrinsics.ll b/test/CodeGen/X86/combine-sse41-intrinsics.ll index 0774f7fe20d..cc4dee33c61 100644 --- a/test/CodeGen/X86/combine-sse41-intrinsics.ll +++ b/test/CodeGen/X86/combine-sse41-intrinsics.ll @@ -157,10 +157,9 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { ; CHECK-LABEL: xor_pblendvb: ; CHECK: # %bb.0: ; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movdqa %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: pblendvb %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movdqa %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = xor <16 x i8> %a2, %2 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %1) @@ -170,11 +169,10 @@ define <16 x i8> @xor_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { ; CHECK-LABEL: xor_blendvps: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: blendvps %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movaps %xmm3, %xmm0 +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: blendvps %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = bitcast <4 x float> %a2 to <4 x i32> %2 = xor <4 x i32> %1, @@ -186,11 +184,10 @@ define <4 x float> @xor_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> % define <2 x double> @xor_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { ; CHECK-LABEL: xor_blendvpd: ; CHECK: # %bb.0: -; CHECK-NEXT: movdqa %xmm0, %xmm3 -; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 -; CHECK-NEXT: pxor %xmm2, %xmm0 -; CHECK-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; CHECK-NEXT: movapd %xmm3, %xmm0 +; CHECK-NEXT: movapd %xmm0, %xmm3 +; CHECK-NEXT: movaps %xmm2, %xmm0 +; CHECK-NEXT: blendvpd %xmm0, %xmm3, %xmm1 +; CHECK-NEXT: movapd %xmm1, %xmm0 ; CHECK-NEXT: retq %1 = bitcast <2 x double> %a2 to <4 x i32> %2 = xor <4 x i32> %1, diff --git a/test/CodeGen/X86/nontemporal-loads.ll b/test/CodeGen/X86/nontemporal-loads.ll index 8f0118d39bd..8af4a680c77 100644 --- a/test/CodeGen/X86/nontemporal-loads.ll +++ b/test/CodeGen/X86/nontemporal-loads.ll @@ -1852,25 +1852,20 @@ define <16 x i32> @test_masked_v16i32(i8 * %addr, <16 x i32> %old, <16 x i32> %m ; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 ; AVX1-NEXT: vpxor %xmm5, %xmm5, %xmm5 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpcmpeqd %xmm6, %xmm6, %xmm6 -; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm3, %xmm3 -; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm2 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm2, %ymm2 ; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm4 ; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm5 ; AVX1-NEXT: vinsertf128 $1, %xmm5, %ymm4, %ymm4 -; AVX1-NEXT: vblendvps %ymm3, %ymm4, %ymm1, %ymm1 +; AVX1-NEXT: vblendvps %ymm3, %ymm1, %ymm4, %ymm1 ; AVX1-NEXT: vmovntdqa (%rdi), %xmm3 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm4 ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm3, %ymm3 -; AVX1-NEXT: vblendvps %ymm2, %ymm3, %ymm0, %ymm0 +; AVX1-NEXT: vblendvps %ymm2, %ymm0, %ymm3, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_masked_v16i32: -- 2.40.0