From 27b613382c6577b8282a11db87c2d99b49cf238e Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 19 Apr 2017 21:23:09 +0000 Subject: [PATCH] [DAG] add splat vector support for 'xor' in SimplifyDemandedBits This allows forming more 'not' ops, so we get improvements for ISAs that have and-not. Follow-up to: https://reviews.llvm.org/rL300725 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300763 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- test/CodeGen/ARM/vbits.ll | 6 ++-- test/CodeGen/PowerPC/andc.ll | 6 ++-- test/CodeGen/X86/avx-logic.ll | 14 +++----- test/CodeGen/X86/avx512-mask-op.ll | 15 ++++---- test/CodeGen/X86/i64-to-float.ll | 40 ++++++++++----------- 7 files changed, 38 insertions(+), 48 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c34a266a44f..9f50bfe7bfb 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -5058,8 +5058,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return Tmp; // Simplify the expression using non-local knowledge. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index c9b438edabf..91230b93423 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -715,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the RHS is a constant, see if we can simplify it. // for XOR, we prefer to force bits to 1 if they will make a -1. // If we can't force bits, try to shrink the constant. - if (ConstantSDNode *C = dyn_cast(Op.getOperand(1))) { + if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) { APInt Expanded = C->getAPIntValue() | (~NewMask); // If we can expand it to have all bits set, do it. if (Expanded.isAllOnesValue()) { diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll index 040904b7e9c..0a7f7698fa8 100644 --- a/test/CodeGen/ARM/vbits.ll +++ b/test/CodeGen/ARM/vbits.ll @@ -850,11 +850,9 @@ define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind { ; CHECK-LABEL: hidden_not_v4i32: ; CHECK: @ BB#0: ; CHECK-NEXT: vmov d19, r2, r3 -; CHECK-NEXT: vmov.i32 q8, #0xf +; CHECK-NEXT: vmov.i32 q8, #0x6 ; CHECK-NEXT: vmov d18, r0, r1 -; CHECK-NEXT: vmov.i32 q10, #0x6 -; CHECK-NEXT: veor q8, q9, q8 -; CHECK-NEXT: vand q8, q8, q10 +; CHECK-NEXT: vbic q8, q8, q9 ; CHECK-NEXT: vmov r0, r1, d16 ; CHECK-NEXT: vmov r2, r3, d17 ; CHECK-NEXT: bx lr diff --git a/test/CodeGen/PowerPC/andc.ll b/test/CodeGen/PowerPC/andc.ll index f9b1e4876fd..df47bfc1e38 100644 --- a/test/CodeGen/PowerPC/andc.ll +++ b/test/CodeGen/PowerPC/andc.ll @@ -43,10 +43,8 @@ define i1 @foo(i32 %i) { define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) { ; CHECK-LABEL: hidden_not_v4i32: ; CHECK: # BB#0: -; CHECK-NEXT: vspltisw 3, 15 -; CHECK-NEXT: vspltisw 4, 6 -; CHECK-NEXT: xxlxor 0, 34, 35 -; CHECK-NEXT: xxland 34, 0, 36 +; CHECK-NEXT: vspltisw 3, 6 +; CHECK-NEXT: xxlandc 34, 35, 34 ; CHECK-NEXT: blr %xor = xor <4 x i32> %x, %and = and <4 x i32> %xor, diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll index 95a61ec8bc3..89abbabee27 100644 --- a/test/CodeGen/X86/avx-logic.ll +++ b/test/CodeGen/X86/avx-logic.ll @@ -274,16 +274,13 @@ entry: define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind { ; AVX-LABEL: and_xor_splat1_v4i32: ; AVX: # BB#0: -; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [1,1,1,1] -; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vandnps {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: and_xor_splat1_v4i32: ; AVX512: # BB#0: ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 -; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: retq %xor = xor <4 x i32> %x, %and = and <4 x i32> %xor, @@ -293,16 +290,13 @@ define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind { define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind { ; AVX-LABEL: and_xor_splat1_v4i64: ; AVX: # BB#0: -; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [1,1,1,1] -; AVX-NEXT: vxorps %ymm1, %ymm0, %ymm0 -; AVX-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vandnps {{.*}}(%rip), %ymm0, %ymm0 ; AVX-NEXT: retq ; ; AVX512-LABEL: and_xor_splat1_v4i64: ; AVX512: # BB#0: ; AVX512-NEXT: vbroadcastsd {{.*}}(%rip), %ymm1 -; AVX512-NEXT: vxorps %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vandps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vandnps %ymm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %xor = xor <4 x i64> %x, %and = and <4 x i64> %xor, diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index aec1339d653..7103efe050a 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -1430,7 +1430,8 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) { define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; KNL-LABEL: store_v2i1: ; KNL: ## BB#0: -; KNL-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpsllq $63, %zmm0, %zmm0 ; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1447,7 +1448,8 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; ; AVX512BW-LABEL: store_v2i1: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512BW-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512BW-NEXT: kmovd %k0, %eax @@ -1457,7 +1459,8 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { ; ; AVX512DQ-LABEL: store_v2i1: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vpxor {{.*}}(%rip), %xmm0, %xmm0 +; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpsllq $63, %zmm0, %zmm0 ; AVX512DQ-NEXT: vptestmq %zmm0, %zmm0, %k0 ; AVX512DQ-NEXT: kmovb %k0, (%rdi) @@ -1471,7 +1474,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) { define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; KNL-LABEL: store_v4i1: ; KNL: ## BB#0: -; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; KNL-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vpslld $31, %ymm0, %ymm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -1489,7 +1492,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; ; AVX512BW-LABEL: store_v4i1: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; AVX512BW-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512BW-NEXT: vptestmd %zmm0, %zmm0, %k0 @@ -1500,7 +1503,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) { ; ; AVX512DQ-LABEL: store_v4i1: ; AVX512DQ: ## BB#0: -; AVX512DQ-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1 +; AVX512DQ-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; AVX512DQ-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512DQ-NEXT: vpslld $31, %ymm0, %ymm0 ; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0 diff --git a/test/CodeGen/X86/i64-to-float.ll b/test/CodeGen/X86/i64-to-float.ll index da92bdb55d7..9626d64847f 100644 --- a/test/CodeGen/X86/i64-to-float.ll +++ b/test/CodeGen/X86/i64-to-float.ll @@ -224,35 +224,33 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X64-SSE-NEXT: movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0] ; X64-SSE-NEXT: movdqa %xmm0, %xmm2 ; X64-SSE-NEXT: pxor %xmm1, %xmm2 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744073709551361,18446744073709551361] -; X64-SSE-NEXT: movdqa %xmm1, %xmm4 -; X64-SSE-NEXT: pxor %xmm3, %xmm4 -; X64-SSE-NEXT: movdqa %xmm4, %xmm5 -; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm5 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] -; X64-SSE-NEXT: pcmpeqd %xmm2, %xmm4 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3] -; X64-SSE-NEXT: pand %xmm6, %xmm2 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] -; X64-SSE-NEXT: por %xmm2, %xmm4 -; X64-SSE-NEXT: movdqa %xmm4, %xmm2 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713] +; X64-SSE-NEXT: movdqa %xmm3, %xmm4 +; X64-SSE-NEXT: pcmpgtd %xmm2, %xmm4 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; X64-SSE-NEXT: pcmpeqd %xmm3, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] +; X64-SSE-NEXT: pand %xmm5, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3] +; X64-SSE-NEXT: por %xmm2, %xmm3 +; X64-SSE-NEXT: movdqa %xmm3, %xmm2 ; X64-SSE-NEXT: pandn %xmm0, %xmm2 -; X64-SSE-NEXT: pand %xmm3, %xmm4 -; X64-SSE-NEXT: por %xmm2, %xmm4 -; X64-SSE-NEXT: movdqa %xmm4, %xmm0 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3 +; X64-SSE-NEXT: por %xmm2, %xmm3 +; X64-SSE-NEXT: movdqa %xmm3, %xmm0 ; X64-SSE-NEXT: pxor %xmm1, %xmm0 ; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255] -; X64-SSE-NEXT: pxor %xmm2, %xmm1 -; X64-SSE-NEXT: movdqa %xmm0, %xmm3 -; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm3 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2] +; X64-SSE-NEXT: por %xmm2, %xmm1 +; X64-SSE-NEXT: movdqa %xmm0, %xmm4 +; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] ; X64-SSE-NEXT: pand %xmm5, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3] +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0 -; X64-SSE-NEXT: pandn %xmm4, %xmm0 +; X64-SSE-NEXT: pandn %xmm3, %xmm0 ; X64-SSE-NEXT: pand %xmm2, %xmm1 ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movd %xmm1, %rax -- 2.50.1