From b13f996da8685e84e09541c776c1aa44bf170dcf Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 9 Sep 2019 01:35:00 +0000 Subject: [PATCH] [X86] Use xorps to create fp128 +0.0 constants. This matches what we do for f32/f64. gcc also does this for fp128. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@371357 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 + lib/Target/X86/X86InstrAVX512.td | 4 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 4 ++ lib/Target/X86/X86InstrInfo.cpp | 14 ++++- lib/Target/X86/X86InstrSSE.td | 4 +- test/CodeGen/X86/fp128-cast.ll | 75 +++++++++++++++++++++---- 6 files changed, 87 insertions(+), 16 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2fa368764a5..0a8219214f4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -628,6 +628,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FABS , MVT::f128, Custom); setOperationAction(ISD::FNEG , MVT::f128, Custom); setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); + + addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps } addRegisterClass(MVT::f80, &X86::RFP80RegClass); diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 46a948bc28d..c17aa29f66f 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -464,7 +464,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "", [(set FR32X:$dst, fp32imm0)]>; def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "", - [(set FR64X:$dst, fpimm0)]>; + [(set FR64X:$dst, fp64imm0)]>; + def AVX512_FsFLD0F128 : I<0, Pseudo, (outs VR128X:$dst), (ins), "", + [(set VR128X:$dst, fp128imm0)]>; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 096cc27861c..d75b492594b 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -963,6 +963,10 @@ def fp64imm0 : PatLeaf<(f64 fpimm), [{ return N->isExactlyValue(+0.0); }]>; +def fp128imm0 : PatLeaf<(f128 fpimm), [{ + return N->isExactlyValue(+0.0); +}]>; + // EXTRACT_get_vextract128_imm xform function: convert extract_subvector index // to VEXTRACTF128/VEXTRACTI128 imm. def EXTRACT_get_vextract128_imm : SDNodeXFormgetOperand(0).getReg(); const TargetRegisterInfo *TRI = &getRegisterInfo(); @@ -5152,6 +5156,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::V_SET0: case X86::V_SETALLONES: case X86::AVX512_128_SET0: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: Alignment = 16; break; case X86::MMX_SET0: @@ -5201,7 +5207,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( case X86::FsFLD0SD: case X86::AVX512_FsFLD0SD: case X86::FsFLD0SS: - case X86::AVX512_FsFLD0SS: { + case X86::AVX512_FsFLD0SS: + case X86::FsFLD0F128: + case X86::AVX512_FsFLD0F128: { // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. // Create a constant-pool entry and operands to load from it. @@ -5231,6 +5239,8 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl( Ty = Type::getFloatTy(MF.getFunction().getContext()); else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD) Ty = Type::getDoubleTy(MF.getFunction().getContext()); + else if (Opc == X86::FsFLD0F128 || Opc == X86::AVX512_FsFLD0F128) + Ty = Type::getFP128Ty(MF.getFunction().getContext()); else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES) Ty = VectorType::get(Type::getInt32Ty(MF.getFunction().getContext()),16); else if (Opc == X86::AVX2_SETALLONES || Opc == X86::AVX_SET0 || diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index db1dbf9be5f..5ed4674cef2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -115,7 +115,9 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoAVX512]>; def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", - [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoAVX512]>; + [(set FR64:$dst, fp64imm0)]>, Requires<[HasSSE2, NoAVX512]>; + def FsFLD0F128 : I<0, Pseudo, (outs VR128:$dst), (ins), "", + [(set VR128:$dst, fp128imm0)]>, Requires<[HasSSE1, NoAVX512]>; } //===----------------------------------------------------------------------===// diff --git a/test/CodeGen/X86/fp128-cast.ll b/test/CodeGen/X86/fp128-cast.ll index 321c49d39d9..f215407df44 100644 --- a/test/CodeGen/X86/fp128-cast.ll +++ b/test/CodeGen/X86/fp128-cast.ll @@ -680,6 +680,57 @@ entry: ret i32 %conv } + +define i32 @TestConst128Zero(fp128 %v) nounwind { +; X64-SSE-LABEL: TestConst128Zero: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: xorps %xmm1, %xmm1 +; X64-SSE-NEXT: callq __gttf2 +; X64-SSE-NEXT: xorl %ecx, %ecx +; X64-SSE-NEXT: testl %eax, %eax +; X64-SSE-NEXT: setg %cl +; X64-SSE-NEXT: movl %ecx, %eax +; X64-SSE-NEXT: popq %rcx +; X64-SSE-NEXT: retq +; +; X32-LABEL: TestConst128Zero: +; X32: # %bb.0: # %entry +; X32-NEXT: subl $12, %esp +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl $0 +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: pushl {{[0-9]+}}(%esp) +; X32-NEXT: calll __gttf2 +; X32-NEXT: addl $32, %esp +; X32-NEXT: xorl %ecx, %ecx +; X32-NEXT: testl %eax, %eax +; X32-NEXT: setg %cl +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: addl $12, %esp +; X32-NEXT: retl +; +; X64-AVX-LABEL: TestConst128Zero: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; X64-AVX-NEXT: callq __gttf2 +; X64-AVX-NEXT: xorl %ecx, %ecx +; X64-AVX-NEXT: testl %eax, %eax +; X64-AVX-NEXT: setg %cl +; X64-AVX-NEXT: movl %ecx, %eax +; X64-AVX-NEXT: popq %rcx +; X64-AVX-NEXT: retq +entry: + %cmp = fcmp ogt fp128 %v, 0xL00000000000000000000000000000000 + %conv = zext i1 %cmp to i32 + ret i32 %conv +} + ; C code: ; struct TestBits_ieee_ext { ; unsigned v1; @@ -833,7 +884,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-SSE-LABEL: TestTruncCopysign: ; X64-SSE: # %bb.0: # %entry ; X64-SSE-NEXT: cmpl $50001, %edi # imm = 0xC351 -; X64-SSE-NEXT: jl .LBB17_2 +; X64-SSE-NEXT: jl .LBB18_2 ; X64-SSE-NEXT: # %bb.1: # %if.then ; X64-SSE-NEXT: pushq %rax ; X64-SSE-NEXT: callq __trunctfdf2 @@ -842,7 +893,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-SSE-NEXT: orps %xmm1, %xmm0 ; X64-SSE-NEXT: callq __extenddftf2 ; X64-SSE-NEXT: addq $8, %rsp -; X64-SSE-NEXT: .LBB17_2: # %cleanup +; X64-SSE-NEXT: .LBB18_2: # %cleanup ; X64-SSE-NEXT: retq ; ; X32-LABEL: TestTruncCopysign: @@ -856,7 +907,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: cmpl $50001, {{[0-9]+}}(%esp) # imm = 0xC351 -; X32-NEXT: jl .LBB17_4 +; X32-NEXT: jl .LBB18_4 ; X32-NEXT: # %bb.1: # %if.then ; X32-NEXT: pushl %eax ; X32-NEXT: pushl %ecx @@ -868,11 +919,11 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X32-NEXT: testb $-128, {{[0-9]+}}(%esp) ; X32-NEXT: flds {{\.LCPI.*}} ; X32-NEXT: flds {{\.LCPI.*}} -; X32-NEXT: jne .LBB17_3 +; X32-NEXT: jne .LBB18_3 ; X32-NEXT: # %bb.2: # %if.then ; X32-NEXT: fstp %st(1) ; X32-NEXT: fldz -; X32-NEXT: .LBB17_3: # %if.then +; X32-NEXT: .LBB18_3: # %if.then ; X32-NEXT: fstp %st(0) ; X32-NEXT: subl $16, %esp ; X32-NEXT: leal {{[0-9]+}}(%esp), %eax @@ -884,7 +935,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi -; X32-NEXT: .LBB17_4: # %cleanup +; X32-NEXT: .LBB18_4: # %cleanup ; X32-NEXT: movl %edx, (%esi) ; X32-NEXT: movl %edi, 4(%esi) ; X32-NEXT: movl %ecx, 8(%esi) @@ -898,7 +949,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-AVX-LABEL: TestTruncCopysign: ; X64-AVX: # %bb.0: # %entry ; X64-AVX-NEXT: cmpl $50001, %edi # imm = 0xC351 -; X64-AVX-NEXT: jl .LBB17_2 +; X64-AVX-NEXT: jl .LBB18_2 ; X64-AVX-NEXT: # %bb.1: # %if.then ; X64-AVX-NEXT: pushq %rax ; X64-AVX-NEXT: callq __trunctfdf2 @@ -908,7 +959,7 @@ define fp128 @TestTruncCopysign(fp128 %x, i32 %n) nounwind { ; X64-AVX-NEXT: vorps %xmm0, %xmm1, %xmm0 ; X64-AVX-NEXT: callq __extenddftf2 ; X64-AVX-NEXT: addq $8, %rsp -; X64-AVX-NEXT: .LBB17_2: # %cleanup +; X64-AVX-NEXT: .LBB18_2: # %cleanup ; X64-AVX-NEXT: retq entry: %cmp = icmp sgt i32 %n, 50000 @@ -928,7 +979,7 @@ cleanup: ; preds = %entry, %if.then define i1 @PR34866(i128 %x) nounwind { ; X64-SSE-LABEL: PR34866: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: xorps %xmm0, %xmm0 ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi ; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi @@ -948,7 +999,7 @@ define i1 @PR34866(i128 %x) nounwind { ; ; X64-AVX-LABEL: PR34866: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi ; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi @@ -963,7 +1014,7 @@ define i1 @PR34866(i128 %x) nounwind { define i1 @PR34866_commute(i128 %x) nounwind { ; X64-SSE-LABEL: PR34866_commute: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: xorps %xmm0, %xmm0 ; X64-SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi ; X64-SSE-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi @@ -983,7 +1034,7 @@ define i1 @PR34866_commute(i128 %x) nounwind { ; ; X64-AVX-LABEL: PR34866_commute: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 ; X64-AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rsi ; X64-AVX-NEXT: xorq -{{[0-9]+}}(%rsp), %rdi -- 2.40.0