From: Aditya Nandakumar Date: Tue, 20 Jun 2017 19:25:23 +0000 (+0000) Subject: [GISel]: Add G_FMA opcode for fused multiply adds X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c7c608fe5e6045aacab77b61cf8bc58c132466fe;p=llvm [GISel]: Add G_FMA opcode for fused multiply adds https://reviews.llvm.org/D34372 Reviewed by dsanders git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@305824 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Target/GenericOpcodes.td b/include/llvm/Target/GenericOpcodes.td index de3796cd4ee..ed31e84046c 100644 --- a/include/llvm/Target/GenericOpcodes.td +++ b/include/llvm/Target/GenericOpcodes.td @@ -386,6 +386,14 @@ def G_FMUL : Instruction { let isCommutable = 1; } +// Generic fused multiply-add instruction. +def G_FMA : Instruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3); + let hasSideEffects = 0; + let isCommutable = 0; +} + // Generic FP division. def G_FDIV : Instruction { let OutOperandList = (outs type0:$dst); diff --git a/include/llvm/Target/TargetOpcodes.def b/include/llvm/Target/TargetOpcodes.def index 36764249632..e2a67a9460d 100644 --- a/include/llvm/Target/TargetOpcodes.def +++ b/include/llvm/Target/TargetOpcodes.def @@ -359,6 +359,9 @@ HANDLE_TARGET_OPCODE(G_FSUB) /// Generic FP multiplication. HANDLE_TARGET_OPCODE(G_FMUL) +/// Generic FMA multiplication. Behaves like llvm fma intrinsic +HANDLE_TARGET_OPCODE(G_FMA) + /// Generic FP division. HANDLE_TARGET_OPCODE(G_FDIV) diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp index dccd8e0706c..7fefbf90922 100644 --- a/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -686,6 +686,13 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID, .addUse(getOrCreateVReg(*CI.getArgOperand(0))) .addUse(getOrCreateVReg(*CI.getArgOperand(1))); return true; + case Intrinsic::fma: + MIRBuilder.buildInstr(TargetOpcode::G_FMA) + .addDef(getOrCreateVReg(CI)) + .addUse(getOrCreateVReg(*CI.getArgOperand(0))) + .addUse(getOrCreateVReg(*CI.getArgOperand(1))) + .addUse(getOrCreateVReg(*CI.getArgOperand(2))); + return true; case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: diff --git a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll index 65b8ba57070..e07d5ad8410 100644 --- a/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll +++ b/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator.ll @@ -1247,6 +1247,18 @@ define float @test_pow_intrin(float %l, float %r) { ret float %res } +declare float @llvm.fma.f32(float, float, float) +define float @test_fma_intrin(float %a, float %b, float %c) { +; CHECK-LABEL: name: test_fma_intrin +; CHECK: [[A:%[0-9]+]](s32) = COPY %s0 +; CHECK: [[B:%[0-9]+]](s32) = COPY %s1 +; CHECK: [[C:%[0-9]+]](s32) = COPY %s2 +; CHECK: [[RES:%[0-9]+]](s32) = G_FMA [[A]], [[B]], [[C]] +; CHECK: %s0 = COPY [[RES]] + %res = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %res +} + declare void @llvm.lifetime.start.p0i8(i64, i8*) declare void @llvm.lifetime.end.p0i8(i64, i8*) define void @test_lifetime_intrin() {