From ff7ef15ee60938a660cba09dec9a970f9ebe016c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 25 Nov 2017 07:20:22 +0000
Subject: [PATCH] [X86] Support folding to andnps with SSE1 only.

With SSE1 only, we emit FAND and FXOR nodes for v4f32.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318968 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/X86ISelLowering.cpp           | 5 ++++-
 test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 6 ++----
 2 files changed, 6 insertions(+), 5 deletions(-)
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 984ad373788..4db9fe8fa2b 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -35033,10 +35033,13 @@ static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
 
   // Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
   if (!((VT == MVT::f32 && Subtarget.hasSSE1()) ||
-        (VT == MVT::f64 && Subtarget.hasSSE2())))
+        (VT == MVT::f64 && Subtarget.hasSSE2()) ||
+        (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2())))
     return SDValue();
 
   auto isAllOnesConstantFP = [](SDValue V) {
+    if (V.getSimpleValueType().isVector())
+      return ISD::isBuildVectorAllOnes(V.getNode());
     auto *C = dyn_cast<ConstantFPSDNode>(V);
     return C && C->getConstantFPValue()->isAllOnesValue();
   };
diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
index 79696e65b39..9f738aa9a0e 100644
--- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
+++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll
@@ -55,14 +55,12 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
 ; X32-LABEL: test_mm_andnot_ps:
 ; X32:       # BB#0:
-; X32-NEXT:    xorps {{\.LCPI.*}}, %xmm0
-; X32-NEXT:    andps %xmm1, %xmm0
+; X32-NEXT:    andnps %xmm1, %xmm0
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test_mm_andnot_ps:
 ; X64:       # BB#0:
-; X64-NEXT:    xorps {{.*}}(%rip), %xmm0
-; X64-NEXT:    andps %xmm1, %xmm0
+; X64-NEXT:    andnps %xmm1, %xmm0
 ; X64-NEXT:    retq
   %arg0 = bitcast <4 x float> %a0 to <4 x i32>
   %arg1 = bitcast <4 x float> %a1 to <4 x i32>
-- 
2.50.1