From 27b613382c6577b8282a11db87c2d99b49cf238e Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 19 Apr 2017 21:23:09 +0000
Subject: [PATCH] [DAG] add splat vector support for 'xor' in
 SimplifyDemandedBits

This allows forming more 'not' ops, so we get improvements for ISAs that have and-not.

Follow-up to:
https://reviews.llvm.org/rL300725


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300763 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/SelectionDAG/DAGCombiner.cpp    |  3 +-
 lib/CodeGen/SelectionDAG/TargetLowering.cpp |  2 +-
 test/CodeGen/ARM/vbits.ll                   |  6 ++--
 test/CodeGen/PowerPC/andc.ll                |  6 ++--
 test/CodeGen/X86/avx-logic.ll               | 14 +++-----
 test/CodeGen/X86/avx512-mask-op.ll          | 15 ++++----
 test/CodeGen/X86/i64-to-float.ll            | 40 ++++++++++-----------
 7 files changed, 38 insertions(+), 48 deletions(-)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c34a266a44f..9f50bfe7bfb 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -5058,8 +5058,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
       return Tmp;
 
   // Simplify the expression using non-local knowledge.
-  if (!VT.isVector() &&
-      SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   return SDValue();
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index c9b438edabf..91230b93423 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -715,7 +715,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // If the RHS is a constant, see if we can simplify it.
     // for XOR, we prefer to force bits to 1 if they will make a -1.
     // If we can't force bits, try to shrink the constant.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
       APInt Expanded = C->getAPIntValue() | (~NewMask);
       // If we can expand it to have all bits set, do it.
       if (Expanded.isAllOnesValue()) {
diff --git a/test/CodeGen/ARM/vbits.ll b/test/CodeGen/ARM/vbits.ll
index 040904b7e9c..0a7f7698fa8 100644
--- a/test/CodeGen/ARM/vbits.ll
+++ b/test/CodeGen/ARM/vbits.ll
@@ -850,11 +850,9 @@ define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) nounwind {
 ; CHECK-LABEL: hidden_not_v4i32:
 ; CHECK:       @ BB#0:
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vmov.i32 q8, #0xf
+; CHECK-NEXT:    vmov.i32 q8, #0x6
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vmov.i32 q10, #0x6
-; CHECK-NEXT:    veor q8, q9, q8
-; CHECK-NEXT:    vand q8, q8, q10
+; CHECK-NEXT:    vbic q8, q8, q9
 ; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    bx lr
diff --git a/test/CodeGen/PowerPC/andc.ll b/test/CodeGen/PowerPC/andc.ll
index f9b1e4876fd..df47bfc1e38 100644
--- a/test/CodeGen/PowerPC/andc.ll
+++ b/test/CodeGen/PowerPC/andc.ll
@@ -43,10 +43,8 @@ define i1 @foo(i32 %i) {
 define <4 x i32> @hidden_not_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: hidden_not_v4i32:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vspltisw 3, 15
-; CHECK-NEXT:    vspltisw 4, 6
-; CHECK-NEXT:    xxlxor 0, 34, 35
-; CHECK-NEXT:    xxland 34, 0, 36
+; CHECK-NEXT:    vspltisw 3, 6
+; CHECK-NEXT:    xxlandc 34, 35, 34
 ; CHECK-NEXT:    blr
   %xor = xor <4 x i32> %x, <i32 15, i32 15, i32 15, i32 15>
   %and = and <4 x i32> %xor, <i32 6, i32 6, i32 6, i32 6>
diff --git a/test/CodeGen/X86/avx-logic.ll b/test/CodeGen/X86/avx-logic.ll
index 95a61ec8bc3..89abbabee27 100644
--- a/test/CodeGen/X86/avx-logic.ll
+++ b/test/CodeGen/X86/avx-logic.ll
@@ -274,16 +274,13 @@ entry:
 define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
 ; AVX-LABEL: and_xor_splat1_v4i32:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} xmm1 = [1,1,1,1]
-; AVX-NEXT:    vxorps %xmm1, %xmm0, %xmm0
-; AVX-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    vandnps {{.*}}(%rip), %xmm0, %xmm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: and_xor_splat1_v4i32:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vbroadcastss {{.*}}(%rip), %xmm1
-; AVX512-NEXT:    vxorps %xmm1, %xmm0, %xmm0
-; AVX512-NEXT:    vandps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT:    vandnps %xmm1, %xmm0, %xmm0
 ; AVX512-NEXT:    retq
   %xor = xor <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %and = and <4 x i32> %xor, <i32 1, i32 1, i32 1, i32 1>
@@ -293,16 +290,13 @@ define <4 x i32> @and_xor_splat1_v4i32(<4 x i32> %x) nounwind {
 define <4 x i64> @and_xor_splat1_v4i64(<4 x i64> %x) nounwind {
 ; AVX-LABEL: and_xor_splat1_v4i64:
 ; AVX:       # BB#0:
-; AVX-NEXT:    vmovaps {{.*#+}} ymm1 = [1,1,1,1]
-; AVX-NEXT:    vxorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX-NEXT:    vandnps {{.*}}(%rip), %ymm0, %ymm0
 ; AVX-NEXT:    retq
 ;
 ; AVX512-LABEL: and_xor_splat1_v4i64:
 ; AVX512:       # BB#0:
 ; AVX512-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm1
-; AVX512-NEXT:    vxorps %ymm1, %ymm0, %ymm0
-; AVX512-NEXT:    vandps %ymm1, %ymm0, %ymm0
+; AVX512-NEXT:    vandnps %ymm1, %ymm0, %ymm0
 ; AVX512-NEXT:    retq
   %xor = xor <4 x i64> %x, <i64 1, i64 1, i64 1, i64 1>
   %and = and <4 x i64> %xor, <i64 1, i64 1, i64 1, i64 1>
diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll
index aec1339d653..7103efe050a 100644
--- a/test/CodeGen/X86/avx512-mask-op.ll
+++ b/test/CodeGen/X86/avx512-mask-op.ll
@@ -1430,7 +1430,8 @@ define void @store_v1i1(<1 x i1> %c , <1 x i1>* %ptr) {
 define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
 ; KNL-LABEL: store_v2i1:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; KNL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; KNL-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
@@ -1447,7 +1448,8 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
 ;
 ; AVX512BW-LABEL: store_v2i1:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512BW-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512BW-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; AVX512BW-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512BW-NEXT:    kmovd %k0, %eax
@@ -1457,7 +1459,8 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
 ;
 ; AVX512DQ-LABEL: store_v2i1:
 ; AVX512DQ:       ## BB#0:
-; AVX512DQ-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; AVX512DQ-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX512DQ-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpsllq $63, %zmm0, %zmm0
 ; AVX512DQ-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512DQ-NEXT:    kmovb %k0, (%rdi)
@@ -1471,7 +1474,7 @@ define void @store_v2i1(<2 x i1> %c , <2 x i1>* %ptr) {
 define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
 ; KNL-LABEL: store_v4i1:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; KNL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; KNL-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; KNL-NEXT:    vpslld $31, %ymm0, %ymm0
 ; KNL-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -1489,7 +1492,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
 ;
 ; AVX512BW-LABEL: store_v4i1:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512BW-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX512BW-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpslld $31, %ymm0, %ymm0
 ; AVX512BW-NEXT:    vptestmd %zmm0, %zmm0, %k0
@@ -1500,7 +1503,7 @@ define void @store_v4i1(<4 x i1> %c , <4 x i1>* %ptr) {
 ;
 ; AVX512DQ-LABEL: store_v4i1:
 ; AVX512DQ:       ## BB#0:
-; AVX512DQ-NEXT:    vpbroadcastd {{.*}}(%rip), %xmm1
+; AVX512DQ-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
 ; AVX512DQ-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; AVX512DQ-NEXT:    vpslld $31, %ymm0, %ymm0
 ; AVX512DQ-NEXT:    vptestmd %zmm0, %zmm0, %k0
diff --git a/test/CodeGen/X86/i64-to-float.ll b/test/CodeGen/X86/i64-to-float.ll
index da92bdb55d7..9626d64847f 100644
--- a/test/CodeGen/X86/i64-to-float.ll
+++ b/test/CodeGen/X86/i64-to-float.ll
@@ -224,35 +224,33 @@ define <2 x double> @clamp_sitofp_2i64_2f64(<2 x i64> %a) nounwind {
 ; X64-SSE-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,0,2147483648,0]
 ; X64-SSE-NEXT:    movdqa %xmm0, %xmm2
 ; X64-SSE-NEXT:    pxor %xmm1, %xmm2
-; X64-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [18446744073709551361,18446744073709551361]
-; X64-SSE-NEXT:    movdqa %xmm1, %xmm4
-; X64-SSE-NEXT:    pxor %xmm3, %xmm4
-; X64-SSE-NEXT:    movdqa %xmm4, %xmm5
-; X64-SSE-NEXT:    pcmpgtd %xmm2, %xmm5
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2]
-; X64-SSE-NEXT:    pcmpeqd %xmm2, %xmm4
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[1,1,3,3]
-; X64-SSE-NEXT:    pand %xmm6, %xmm2
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3]
-; X64-SSE-NEXT:    por %xmm2, %xmm4
-; X64-SSE-NEXT:    movdqa %xmm4, %xmm2
+; X64-SSE-NEXT:    movdqa {{.*#+}} xmm3 = [18446744071562067713,18446744071562067713]
+; X64-SSE-NEXT:    movdqa %xmm3, %xmm4
+; X64-SSE-NEXT:    pcmpgtd %xmm2, %xmm4
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
+; X64-SSE-NEXT:    pcmpeqd %xmm3, %xmm2
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
+; X64-SSE-NEXT:    pand %xmm5, %xmm2
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm3 = xmm4[1,1,3,3]
+; X64-SSE-NEXT:    por %xmm2, %xmm3
+; X64-SSE-NEXT:    movdqa %xmm3, %xmm2
 ; X64-SSE-NEXT:    pandn %xmm0, %xmm2
-; X64-SSE-NEXT:    pand %xmm3, %xmm4
-; X64-SSE-NEXT:    por %xmm2, %xmm4
-; X64-SSE-NEXT:    movdqa %xmm4, %xmm0
+; X64-SSE-NEXT:    pand {{.*}}(%rip), %xmm3
+; X64-SSE-NEXT:    por %xmm2, %xmm3
+; X64-SSE-NEXT:    movdqa %xmm3, %xmm0
 ; X64-SSE-NEXT:    pxor %xmm1, %xmm0
 ; X64-SSE-NEXT:    movdqa {{.*#+}} xmm2 = [255,255]
-; X64-SSE-NEXT:    pxor %xmm2, %xmm1
-; X64-SSE-NEXT:    movdqa %xmm0, %xmm3
-; X64-SSE-NEXT:    pcmpgtd %xmm1, %xmm3
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm3[0,0,2,2]
+; X64-SSE-NEXT:    por %xmm2, %xmm1
+; X64-SSE-NEXT:    movdqa %xmm0, %xmm4
+; X64-SSE-NEXT:    pcmpgtd %xmm1, %xmm4
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm5 = xmm4[0,0,2,2]
 ; X64-SSE-NEXT:    pcmpeqd %xmm0, %xmm1
 ; X64-SSE-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
 ; X64-SSE-NEXT:    pand %xmm5, %xmm0
-; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm3[1,1,3,3]
+; X64-SSE-NEXT:    pshufd {{.*#+}} xmm1 = xmm4[1,1,3,3]
 ; X64-SSE-NEXT:    por %xmm0, %xmm1
 ; X64-SSE-NEXT:    movdqa %xmm1, %xmm0
-; X64-SSE-NEXT:    pandn %xmm4, %xmm0
+; X64-SSE-NEXT:    pandn %xmm3, %xmm0
 ; X64-SSE-NEXT:    pand %xmm2, %xmm1
 ; X64-SSE-NEXT:    por %xmm0, %xmm1
 ; X64-SSE-NEXT:    movd %xmm1, %rax
-- 
2.50.1