From: Sanjay Patel Date: Wed, 19 Apr 2017 22:00:00 +0000 (+0000) Subject: [DAG] add splat vector support for 'or' in SimplifyDemandedBits X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=2aa6ae886a7d9f9652e9a657d0dd6d06adb0f123;p=llvm [DAG] add splat vector support for 'or' in SimplifyDemandedBits I've changed one of the tests to not fold away, but we didn't and still don't do the transform that the comment claims we do (and I don't know why we'd want to do that). Follow-up to: https://reviews.llvm.org/rL300725 https://reviews.llvm.org/rL300763 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300772 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9f50bfe7bfb..66afc905ca0 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4225,8 +4225,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return Load; // Simplify the operands using demanded-bits information. - if (!VT.isVector() && - SimplifyDemandedBits(SDValue(N, 0))) + if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); return SDValue(); diff --git a/test/CodeGen/X86/combine-or.ll b/test/CodeGen/X86/combine-or.ll index e4cf296432b..d7f52d24798 100644 --- a/test/CodeGen/X86/combine-or.ll +++ b/test/CodeGen/X86/combine-or.ll @@ -430,6 +430,7 @@ define <4 x i32> @test2f(<4 x i32> %a, <4 x i32> %b) { ret <4 x i32> %or } +; TODO: Why would we do this? ; (or (and X, c1), c2) -> (and (or X, c2), c1|c2) define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { @@ -438,16 +439,17 @@ define <2 x i64> @or_and_v2i64(<2 x i64> %a0) { ; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 ; CHECK-NEXT: retq - %1 = and <2 x i64> %a0, + %1 = and <2 x i64> %a0, %2 = or <2 x i64> %1, ret <2 x i64> %2 } +; If all masked bits are going to be set, that's a constant fold. + define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { ; CHECK-LABEL: or_and_v4i32: ; CHECK: # BB#0: -; CHECK-NEXT: andps {{.*}}(%rip), %xmm0 -; CHECK-NEXT: orps {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [3,3,3,3] ; CHECK-NEXT: retq %1 = and <4 x i32> %a0, %2 = or <4 x i32> %1, @@ -459,9 +461,7 @@ define <4 x i32> @or_and_v4i32(<4 x i32> %a0) { define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { ; CHECK-LABEL: or_zext_v2i32: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [4294967295,4294967295] ; CHECK-NEXT: retq %1 = zext <2 x i32> %a0 to <2 x i64> %2 = or <2 x i64> %1, @@ -471,9 +471,7 @@ define <2 x i64> @or_zext_v2i32(<2 x i32> %a0) { define <4 x i32> @or_zext_v4i16(<4 x i16> %a0) { ; CHECK-LABEL: or_zext_v4i16: ; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm1, %xmm1 -; CHECK-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; CHECK-NEXT: por {{.*}}(%rip), %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm0 = [65535,65535,65535,65535] ; CHECK-NEXT: retq %1 = zext <4 x i16> %a0 to <4 x i32> %2 = or <4 x i32> %1, diff --git a/test/CodeGen/X86/i64-to-float.ll b/test/CodeGen/X86/i64-to-float.ll index 9626d64847f..3da1a360e29 100644 --- a/test/CodeGen/X86/i64-to-float.ll +++ b/test/CodeGen/X86/i64-to-float.ll @@ -237,21 +237,19 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind { ; X64-SSE-NEXT: pandn %xmm0, %xmm2 ; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm3 ; X64-SSE-NEXT: por %xmm2, %xmm3 -; X64-SSE-NEXT: movdqa %xmm3, %xmm0 -; X64-SSE-NEXT: pxor %xmm1, %xmm0 -; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,255] -; X64-SSE-NEXT: por %xmm2, %xmm1 -; X64-SSE-NEXT: movdqa %xmm0, %xmm4 -; X64-SSE-NEXT: pcmpgtd %xmm1, %xmm4 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2] +; X64-SSE-NEXT: pxor %xmm3, %xmm1 +; X64-SSE-NEXT: movdqa {{.*#+}} xmm0 = [2147483903,2147483903] +; X64-SSE-NEXT: movdqa %xmm1, %xmm2 +; X64-SSE-NEXT: pcmpgtd %xmm0, %xmm2 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,0,2,2] ; X64-SSE-NEXT: pcmpeqd %xmm0, %xmm1 ; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3] -; X64-SSE-NEXT: pand %xmm5, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3] +; X64-SSE-NEXT: pand %xmm4, %xmm0 +; X64-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3] ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ; X64-SSE-NEXT: pandn %xmm3, %xmm0 -; X64-SSE-NEXT: pand %xmm2, %xmm1 +; X64-SSE-NEXT: pand {{.*}}(%rip), %xmm1 ; X64-SSE-NEXT: por %xmm0, %xmm1 ; X64-SSE-NEXT: movd %xmm1, %rax ; X64-SSE-NEXT: xorps %xmm0, %xmm0