From 2aa6ae886a7d9f9652e9a657d0dd6d06adb0f123 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Wed, 19 Apr 2017 22:00:00 +0000 Subject: [PATCH] [DAG] add splat vector support for 'or' in SimplifyDemandedBits I've changed one of the tests to not fold away, but we didn't and still don't do the transform that the comment claims we do (and I don't know why we'd want to do that). Follow-up to: https://reviews.llvm.org/rL300725 https://reviews.llvm.org/rL300763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300772 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 +-- test/CodeGen/X86/combine-or.ll | 16 +++++++--------- test/CodeGen/X86/i64-to-float.ll | 18 ++++++++---------- 3 files changed, 16 insertions(+), 21 deletions(-) diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9f50bfe7bfb..66afc905ca0 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Load; // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll index e4cf296432b..d7f52d24798 100644 --- a/test/CodeGen/X86/combine-or.ll +++ b/test/CodeGen/X86/combine-or.ll @@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %or } +; TODO: Why would we do this? ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { @@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %1 = and <2 x i64> %a0, + %1 = and <2 x i64> %a0, %2 = or <2 x i64> %1, ret <2 x i64> %2 } +; If all masked bits are going to be set, that's a constant fold. + define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { ; CHECK-LABEL: or_and_v4i32: ; CHECK: # BB#0: -; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %a0, %2 = or <4 x i32> %1, @@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { ; CHECK-LABEL: or_zext_v2i32: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295] ; CHECK-NEXT: retq %1 = zext <2 x i32> %a0 to <2 x i64> %2 = or <2 x i64> %1, @@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { ; CHECK-LABEL: or_zext_v4i16: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] ; CHECK-NEXT: retq %1 = zext <4 x i16> %a0 to <4 x i32> %2 = or <4 x i32> %1, diff --git a/test/CodeGen/X86/i64-to-float.ll b/test/CodeGen/X86/i64-to-float.ll index 9626d64847f..3da1a360e29 100644 --- a/test/CodeGen/X86/i64-to-float.ll +++ b/test/CodeGen/X86/i64-to-float.ll @@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X64-SSE-NEXT: pandn %xmm0, %xmm2 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3 ; X64-SSE-NEXT: por %xmm2, %xmm3 -; X64-SSE-NEXT: movdqa %xmm3, %xmm0 -; X64-SSE-NEXT: pxor %xmm1, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255] -; X64-SSE-NEXT: por %xmm2, %xmm1 -; X64-SSE-NEXT: movdqa %xmm0, %xmm4 -; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; X64-SSE-NEXT: pxor %xmm3, %xmm1 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903] +; X64-SSE-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] -; X64-SSE-NEXT: pand %xmm5, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] +; X64-SSE-NEXT: pand %xmm4, %xmm0 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ; X64-SSE-NEXT: pandn %xmm3, %xmm0 -; X64-SSE-NEXT: pand %xmm2, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movd %xmm1, %rax ; X64-SSE-NEXT: xorps %xmm0, %xmm0 -- 2.50.1