From: Juergen Ributzka Date: Wed, 11 Jun 2014 23:11:02 +0000 (+0000) Subject: [FastISel][X86] Add support for the sqrt intrinsic. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=02503401b4668c068e6bf032935eac4a97fd7b8d;p=llvm [FastISel][X86] Add support for the sqrt intrinsic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@210720 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 23919849648..329a96eb9fc 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1781,6 +1781,58 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); return true; } + case Intrinsic::sqrt: { + if (!Subtarget->hasSSE1()) + return false; + + Type *RetTy = I.getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + // Unfortunatelly we can't use FastEmit_r, because the AVX version of FSQRT + // is not generated by FastISel yet. + // FIXME: Update this code once tablegen can handle it. + static const unsigned SqrtOpc[2][2] = { + {X86::SQRTSSr, X86::VSQRTSSr}, + {X86::SQRTSDr, X86::VSQRTSDr} + }; + bool HasAVX = Subtarget->hasAVX(); + unsigned Opc; + const TargetRegisterClass *RC; + switch (VT.SimpleTy) { + default: return false; + case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; + case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; + } + + const Value *SrcVal = I.getArgOperand(0); + unsigned SrcReg = getRegForValue(SrcVal); + + if (SrcReg == 0) + return false; + + unsigned ImplicitDefReg = 0; + if (HasAVX) { + ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + } + + unsigned ResultReg = createResultReg(RC); + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + if (ImplicitDefReg) + MIB.addReg(ImplicitDefReg); + + MIB.addReg(SrcReg); + + UpdateValueMap(&I, ResultReg); + return true; + } case Intrinsic::sadd_with_overflow: case Intrinsic::uadd_with_overflow: case Intrinsic::ssub_with_overflow: diff --git a/test/CodeGen/X86/sqrt.ll b/test/CodeGen/X86/sqrt.ll new file mode 100644 index 00000000000..be7c6e86739 --- /dev/null +++ b/test/CodeGen/X86/sqrt.ll @@ -0,0 +1,26 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx,+sse2 -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=SSE2 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx | FileCheck %s --check-prefix=AVX +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=-avx2,+avx -fast-isel -fast-isel-abort | FileCheck %s --check-prefix=AVX + +define float @test_sqrt_f32(float %a) { +; SSE2-LABEL: test_sqrt_f32 +; SSE2: sqrtss %xmm0, %xmm0 +; AVX-LABEL: test_sqrt_f32 +; AVX: vsqrtss %xmm0, %xmm0 + %res = call float @llvm.sqrt.f32(float %a) + ret float %res +} +declare float @llvm.sqrt.f32(float) nounwind readnone + +define double @test_sqrt_f64(double %a) { +; SSE2-LABEL: test_sqrt_f64 +; SSE2: sqrtsd %xmm0, %xmm0 +; AVX-LABEL: test_sqrt_f64 +; AVX: vsqrtsd %xmm0, %xmm0 + %res = call double @llvm.sqrt.f64(double %a) + ret double %res +} +declare double @llvm.sqrt.f64(double) nounwind readnone + +