From ff7ef15ee60938a660cba09dec9a970f9ebe016c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 25 Nov 2017 07:20:22 +0000 Subject: [PATCH] [X86] Support folding to andnps with SSE1 only. With SSE1 only, we emit FAND and FXOR nodes for v4f32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@318968 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 5 ++++- test/CodeGen/X86/sse-intrinsics-fast-isel.ll | 6 ++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 984ad373788..4db9fe8fa2b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -35033,10 +35033,13 @@ static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, // Vector types are handled in combineANDXORWithAllOnesIntoANDNP(). if (!((VT == MVT::f32 && Subtarget.hasSSE1()) || - (VT == MVT::f64 && Subtarget.hasSSE2()))) + (VT == MVT::f64 && Subtarget.hasSSE2()) || + (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2()))) return SDValue(); auto isAllOnesConstantFP = [](SDValue V) { + if (V.getSimpleValueType().isVector()) + return ISD::isBuildVectorAllOnes(V.getNode()); auto *C = dyn_cast(V); return C && C->getConstantFPValue()->isAllOnesValue(); }; diff --git a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 79696e65b39..9f738aa9a0e 100644 --- a/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -55,14 +55,12 @@ define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_andnot_ps: ; X32: # BB#0: -; X32-NEXT: xorps {{\.LCPI.*}}, %xmm0 -; X32-NEXT: andps %xmm1, %xmm0 +; X32-NEXT: andnps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm_andnot_ps: ; X64: # BB#0: -; X64-NEXT: xorps {{.*}}(%rip), %xmm0 -; X64-NEXT: andps %xmm1, %xmm0 +; X64-NEXT: andnps %xmm1, %xmm0 ; X64-NEXT: retq %arg0 = bitcast <4 x float> %a0 to <4 x i32> %arg1 = bitcast <4 x float> %a1 to <4 x i32> -- 2.50.1