From f61ad6ec119554f251f44f54c9cf42d8b713866e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 3 Jan 2017 04:32:35 +0000
Subject: [PATCH] InstCombine: Add fma with constant transforms

DAGCombine already does these.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@290860 91177308-0d34-0410-b5e6-96231b3b80d8
---
 .../InstCombine/InstCombineCalls.cpp          | 20 ++++-
 test/Transforms/InstCombine/fma.ll            | 75 ++++++++++++++++++-
 2 files changed, 90 insertions(+), 5 deletions(-)
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 029976e3133..e80b55320e7 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1583,12 +1583,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
   case Intrinsic::fma:
   case Intrinsic::fmuladd: {
-    Value *LHS = nullptr;
-    Value *RHS = nullptr;
-
     Value *Src0 = II->getArgOperand(0);
     Value *Src1 = II->getArgOperand(1);
 
+    // Canonicalize constants into the RHS.
+    if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
+      II->setArgOperand(0, Src1);
+      II->setArgOperand(1, Src0);
+      std::swap(Src0, Src1);
+    }
+
+    Value *LHS = nullptr;
+    Value *RHS = nullptr;
+
     // fma fneg(x), fneg(y), z -> fma x, y, z
     if (match(Src0, m_FNeg(m_Value(LHS))) &&
         match(Src1, m_FNeg(m_Value(RHS)))) {
@@ -1609,6 +1616,13 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return replaceInstUsesWith(*II, NewCall);
     }
 
+    // fma x, 1, z -> fadd x, z
+    if (match(Src1, m_FPOne())) {
+      Instruction *RI = BinaryOperator::CreateFAdd(Src0, II->getArgOperand(2));
+      RI->copyFastMathFlags(II);
+      return RI;
+    }
+
     break;
   }
   case Intrinsic::ppc_altivec_lvx:
diff --git a/test/Transforms/InstCombine/fma.ll b/test/Transforms/InstCombine/fma.ll
index 78e3d63b396..e41f1e7edd4 100644
--- a/test/Transforms/InstCombine/fma.ll
+++ b/test/Transforms/InstCombine/fma.ll
@@ -1,6 +1,7 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 
 declare float @llvm.fma.f32(float, float, float) #1
+declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) #1
 declare float @llvm.fmuladd.f32(float, float, float) #1
 declare float @llvm.fabs.f32(float) #1
 
@@ -25,7 +26,7 @@ define float @fma_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
 }
 
 ; CHECK-LABEL: @fma_fneg_const_fneg_y(
-; CHECK: %fma = call float @llvm.fma.f32(float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %y, float %z)
+; CHECK: %fma = call float @llvm.fma.f32(float %y, float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %z)
 define float @fma_fneg_const_fneg_y(float %y, float %z) {
   %y.fneg = fsub float -0.0, %y
   %fma = call float @llvm.fma.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
@@ -86,7 +87,7 @@ define float @fmuladd_fneg_x_fneg_y_fast(float %x, float %y, float %z) {
 }
 
 ; CHECK-LABEL: @fmuladd_fneg_const_fneg_y(
-; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %y, float %z)
+; CHECK: %fmuladd = call float @llvm.fmuladd.f32(float %y, float bitcast (i32 ptrtoint (i32* @external to i32) to float), float %z)
 define float @fmuladd_fneg_const_fneg_y(float %y, float %z) {
   %y.fneg = fsub float -0.0, %y
   %fmuladd = call float @llvm.fmuladd.f32(float fsub (float -0.0, float bitcast (i32 ptrtoint (i32* @external to i32) to float)), float %y.fneg, float %z)
@@ -128,5 +129,75 @@ define float @fmuladd_fabs_x_fabs_x_fast(float %x, float %z) {
   ret float %fmuladd
 }
 
+; CHECK-LABEL: @fma_k_y_z(
+; CHECK: %fma = call float @llvm.fma.f32(float %y, float 4.000000e+00, float %z)
+define float @fma_k_y_z(float %y, float %z) {
+  %fma = call float @llvm.fma.f32(float 4.0, float %y, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_k_y_z_fast(
+; CHECK: %fma = call fast float @llvm.fma.f32(float %y, float 4.000000e+00, float %z)
+define float @fma_k_y_z_fast(float %y, float %z) {
+  %fma = call fast float @llvm.fma.f32(float 4.0, float %y, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fmuladd_k_y_z_fast(
+; CHECK: %fmuladd = call fast float @llvm.fmuladd.f32(float %y, float 4.000000e+00, float %z)
+define float @fmuladd_k_y_z_fast(float %y, float %z) {
+  %fmuladd = call fast float @llvm.fmuladd.f32(float 4.0, float %y, float %z)
+  ret float %fmuladd
+}
+
+; CHECK-LABEL: @fma_1_y_z(
+; CHECK: %fma = fadd float %y, %z
+define float @fma_1_y_z(float %y, float %z) {
+  %fma = call float @llvm.fma.f32(float 1.0, float %y, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_x_1_z(
+; CHECK: %fma = fadd float %x, %z
+define float @fma_x_1_z(float %x, float %z) {
+  %fma = call float @llvm.fma.f32(float %x, float 1.0, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_x_1_z_v2f32(
+; CHECK: %fma = fadd <2 x float> %x, %z
+define <2 x float> @fma_x_1_z_v2f32(<2 x float> %x, <2 x float> %z) {
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 1.0>, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+; CHECK-LABEL: @fma_x_1_2_z_v2f32(
+; CHECK: %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> <float 1.000000e+00, float 2.000000e+00>, <2 x float> %z)
+define <2 x float> @fma_x_1_2_z_v2f32(<2 x float> %x, <2 x float> %z) {
+  %fma = call <2 x float> @llvm.fma.v2f32(<2 x float> %x, <2 x float> <float 1.0, float 2.0>, <2 x float> %z)
+  ret <2 x float> %fma
+}
+
+; CHECK-LABEL: @fma_x_1_z_fast(
+; CHECK: %fma = fadd fast float %x, %z
+define float @fma_x_1_z_fast(float %x, float %z) {
+  %fma = call fast float @llvm.fma.f32(float %x, float 1.0, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fma_1_1_z(
+; CHECK: %fma = fadd float %z, 1.0
+define float @fma_1_1_z(float %z) {
+  %fma = call float @llvm.fma.f32(float 1.0, float 1.0, float %z)
+  ret float %fma
+}
+
+; CHECK-LABEL: @fmuladd_x_1_z_fast(
+; CHECK: %fmuladd = fadd fast float %x, %z
+define float @fmuladd_x_1_z_fast(float %x, float %z) {
+  %fmuladd = call fast float @llvm.fmuladd.f32(float %x, float 1.0, float %z)
+  ret float %fmuladd
+}
+
 attributes #0 = { nounwind }
 attributes #1 = { nounwind readnone }
-- 
2.49.0