Add ‘llvm.experimental.constrained.fma‘ Intrinsic.

author Wei Ding <wei.ding2@amd.com>

Thu, 24 Aug 2017 04:18:24 +0000 (04:18 +0000)

committer Wei Ding <wei.ding2@amd.com>

Thu, 24 Aug 2017 04:18:24 +0000 (04:18 +0000)
author Wei Ding <wei.ding2@amd.com>
Thu, 24 Aug 2017 04:18:24 +0000 (04:18 +0000)
committer Wei Ding <wei.ding2@amd.com>
Thu, 24 Aug 2017 04:18:24 +0000 (04:18 +0000)
diff --git a/docs/LangRef.rst b/docs/LangRef.rst

index d4166216e49ef49514d804ba6d0e4e7a9de70989..49ded73aaf662126b803907aa37560fc4ac1793d 100644 (file)
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -13021,6 +13021,41 @@ The value produced is the floating point remainder from the division of the two
  value operands and has the same type as the operands.  The remainder has the
  same sign as the dividend. 
  
+'``llvm.experimental.constrained.fma``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+      declare <type>
+      @llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
+                                          metadata <rounding mode>,
+                                          metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fma``' intrinsic returns the result of a
+fused-multiply-add operation on its operands.
+
+Arguments:
+""""""""""
+
+The first three arguments to the '``llvm.experimental.constrained.fma``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values. All arguments must have identical types.
+
+The fourth and fifth arguments specify the rounding mode and exception behavior
+as described above.
+
+Semantics:
+""""""""""
+
+The result produced is the product of the first two operands added to the third
+operand computed with infinite precision, and then rounded to the target
+precision.
  
  Constrained libm-equivalent Intrinsics
  --------------------------------------
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h

index bc5d2353f63e31a3655113547d685e4572f9b8aa..ffcf3fa62f970345158ca38a57ee32e24c7ef7bb 100644 (file)
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -263,6 +263,7 @@ namespace ISD {
      /// They are used to limit optimizations while the DAG is being
      /// optimized.
      STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
+    STRICT_FMA,
  
      /// Constrained versions of libm-equivalent floating point intrinsics.
      /// These will be lowered to the equivalent non-constrained pseudo-op
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h

index db42fb6c170c022f9bff5138366b42999ddb8762..3f88ed2747c07b3b97f5c6a7a1d9acf0d7812652 100644 (file)
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -623,13 +623,14 @@ public:
    /// Test if this node is a strict floating point pseudo-op.
    bool isStrictFPOpcode() {
      switch (NodeType) {
-      default: 
+      default:
          return false;
        case ISD::STRICT_FADD:
        case ISD::STRICT_FSUB:
        case ISD::STRICT_FMUL:
        case ISD::STRICT_FDIV:
        case ISD::STRICT_FREM:
+      case ISD::STRICT_FMA:
        case ISD::STRICT_FSQRT:
        case ISD::STRICT_FPOW:
        case ISD::STRICT_FPOWI:
diff --git a/include/llvm/IR/IntrinsicInst.h b/include/llvm/IR/IntrinsicInst.h

index aad49bc9ac3f7327c736003d8090b19edc4a49aa..f58ee217a7c1a7600864ce9ef556b02eede86b2b 100644 (file)
--- a/include/llvm/IR/IntrinsicInst.h
+++ b/include/llvm/IR/IntrinsicInst.h
@@ -167,6 +167,7 @@ namespace llvm {
      };
  
      bool isUnaryOp() const;
+    bool isTernaryOp() const;
      RoundingMode getRoundingMode() const;
      ExceptionBehavior getExceptionBehavior() const;
  
@@ -178,6 +179,7 @@ namespace llvm {
        case Intrinsic::experimental_constrained_fmul:
        case Intrinsic::experimental_constrained_fdiv:
        case Intrinsic::experimental_constrained_frem:
+      case Intrinsic::experimental_constrained_fma:
        case Intrinsic::experimental_constrained_sqrt:
        case Intrinsic::experimental_constrained_pow:
        case Intrinsic::experimental_constrained_powi:
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td

index e2cd40b372d773719bb147fed479d3d2affda5f5..55b284a10765a7182639e1ee666180c500a0ab73 100644 (file)
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -490,6 +490,13 @@ let IntrProperties = [IntrInaccessibleMemOnly] in {
                                                        llvm_metadata_ty,
                                                        llvm_metadata_ty ]>;
  
+  def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
+                                                    [ LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      LLVMMatchType<0>,
+                                                      llvm_metadata_ty,
+                                                      llvm_metadata_ty ]>;
+
    // These intrinsics are sensitive to the rounding mode so we need constrained
    // versions of each of them.  When strict rounding and exception control are
    // not required the non-constrained versions of these intrinsics should be
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

index 7a208cb53ae5a255254db114fa77453df811a14f..e0825e8e8e3f9a93433039c5609b92663e8535a4 100644 (file)
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -907,6 +907,7 @@ getStrictFPOpcodeAction(const TargetLowering &TLI, unsigned Opcode, EVT VT) {
      case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
      case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
      case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+    case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
      case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
      case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
      case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
@@ -1072,6 +1073,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
      }
      break;
    case ISD::STRICT_FSQRT:
+  case ISD::STRICT_FMA:
    case ISD::STRICT_FPOW:
    case ISD::STRICT_FPOWI:
    case ISD::STRICT_FSIN:
@@ -1240,7 +1242,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
        // If the index is dependent on the store we will introduce a cycle when
        // creating the load (the load uses the index, and by replacing the chain
        // we will make the index dependent on the load). Also, the store might be
-      // dependent on the extractelement and introduce a cycle when creating 
+      // dependent on the extractelement and introduce a cycle when creating
        // the load.
        if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
            ST->hasPredecessor(Op.getNode()))
@@ -4065,6 +4067,10 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
      Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
                                        RTLIB::FMA_F80, RTLIB::FMA_F128,
                                        RTLIB::FMA_PPCF128));
+  case ISD::STRICT_FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_F128,
+                                      RTLIB::FMA_PPCF128));
      break;
    case ISD::FADD:
      Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

index b4154210329e4d800f783cff9c30529a6f223222..d00ecafd58026ad0502eb45e5b208761da1eca56 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -6695,6 +6695,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
    unsigned OrigOpc = Node->getOpcode();
    unsigned NewOpc;
    bool IsUnary = false;
+  bool IsTernary = false;
    switch (OrigOpc) {
    default:
      llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
@@ -6703,6 +6704,7 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
    case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
    case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
    case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+  case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
    case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
    case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
    case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
@@ -6729,6 +6731,10 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
    SDNode *Res = nullptr;
    if (IsUnary)
      Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+  else if (IsTernary)
+    Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+                                           Node->getOperand(2),
+                                           Node->getOperand(3)});
    else
      Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
                                             Node->getOperand(2) });
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

index fc441b8d857665275e76e32480214bf0d32dca99..1371f52187ce2faad281d02918e6c9fa7f602d02 100644 (file)
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5432,6 +5432,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
    case Intrinsic::experimental_constrained_fmul:
    case Intrinsic::experimental_constrained_fdiv:
    case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
    case Intrinsic::experimental_constrained_sqrt:
    case Intrinsic::experimental_constrained_pow:
    case Intrinsic::experimental_constrained_powi:
@@ -5963,6 +5964,9 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
    case Intrinsic::experimental_constrained_frem:
      Opcode = ISD::STRICT_FREM;
      break;
+  case Intrinsic::experimental_constrained_fma:
+    Opcode = ISD::STRICT_FMA;
+    break;
    case Intrinsic::experimental_constrained_sqrt:
      Opcode = ISD::STRICT_FSQRT;
      break;
@@ -6009,10 +6013,15 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
    SDVTList VTs = DAG.getVTList(ValueVTs);
    SDValue Result;
    if (FPI.isUnaryOp())
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                           { Chain, getValue(FPI.getArgOperand(0)) });
+  else if (FPI.isTernaryOp())
+    Result = DAG.getNode(Opcode, sdl, VTs,
+                         { Chain, getValue(FPI.getArgOperand(0)),
+                                  getValue(FPI.getArgOperand(1)),
+                                  getValue(FPI.getArgOperand(2)) });
    else
-    Result = DAG.getNode(Opcode, sdl, VTs, 
+    Result = DAG.getNode(Opcode, sdl, VTs,
                           { Chain, getValue(FPI.getArgOperand(0)),
                             getValue(FPI.getArgOperand(1))  });
  
diff --git a/lib/IR/IntrinsicInst.cpp b/lib/IR/IntrinsicInst.cpp

index 8b12c55937f544237e56547906842789e3528e61..67bd5b69bb0f05b1b8b584d6e9b6fad84a7e1994 100644 (file)
--- a/lib/IR/IntrinsicInst.cpp
+++ b/lib/IR/IntrinsicInst.cpp
@@ -14,10 +14,10 @@
  // are all subclasses of the CallInst class.  Note that none of these classes
  // has state or virtual methods, which is an important part of this gross/neat
  // hack working.
-// 
+//
  // In some cases, arguments to intrinsics need to be generic and are defined as
  // type pointer to empty struct { }*.  To access the real item of interest the
-// cast instruction needs to be stripped away. 
+// cast instruction needs to be stripped away.
  //
  //===----------------------------------------------------------------------===//
  
@@ -98,7 +98,7 @@ Value *InstrProfIncrementInst::getStep() const {
  ConstrainedFPIntrinsic::RoundingMode
  ConstrainedFPIntrinsic::getRoundingMode() const {
    unsigned NumOperands = getNumArgOperands();
-  Metadata *MD = 
+  Metadata *MD =
        dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
    if (!MD || !isa<MDString>(MD))
      return rmInvalid;
@@ -118,7 +118,7 @@ ConstrainedFPIntrinsic::getRoundingMode() const {
  ConstrainedFPIntrinsic::ExceptionBehavior
  ConstrainedFPIntrinsic::getExceptionBehavior() const {
    unsigned NumOperands = getNumArgOperands();
-  Metadata *MD = 
+  Metadata *MD =
        dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
    if (!MD || !isa<MDString>(MD))
      return ebInvalid;
@@ -132,7 +132,7 @@ ConstrainedFPIntrinsic::getExceptionBehavior() const {
  
  bool ConstrainedFPIntrinsic::isUnaryOp() const {
    switch (getIntrinsicID()) {
-    default: 
+    default:
        return false;
      case Intrinsic::experimental_constrained_sqrt:
      case Intrinsic::experimental_constrained_sin:
@@ -147,3 +147,13 @@ bool ConstrainedFPIntrinsic::isUnaryOp() const {
        return true;
    }
  }
+
+bool ConstrainedFPIntrinsic::isTernaryOp() const {
+  switch (getIntrinsicID()) {
+    default:
+      return false;
+    case Intrinsic::experimental_constrained_fma:
+      return true;
+  }
+}
+
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp

index c5cee49c0c29788f49ed65b70b4a544661f934fc..0cd6dbb0664a99522fb0ea6877c2134244226e34 100644 (file)
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -3973,6 +3973,7 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
    case Intrinsic::experimental_constrained_fmul:
    case Intrinsic::experimental_constrained_fdiv:
    case Intrinsic::experimental_constrained_frem:
+  case Intrinsic::experimental_constrained_fma:
    case Intrinsic::experimental_constrained_sqrt:
    case Intrinsic::experimental_constrained_pow:
    case Intrinsic::experimental_constrained_powi:
@@ -4433,8 +4434,9 @@ static DISubprogram *getSubprogram(Metadata *LocalScope) {
  
  void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
    unsigned NumOperands = FPI.getNumArgOperands();
-  Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
-         "invalid arguments for constrained FP intrinsic", &FPI);
+  Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
+          (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
+           "invalid arguments for constrained FP intrinsic", &FPI);
    Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
           "invalid exception behavior argument", &FPI);
    Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
diff --git a/test/CodeGen/X86/fp-intrinsics.ll b/test/CodeGen/X86/fp-intrinsics.ll

index 0f8d730d75356d72ed83a92842d09d0574726fea..eae3955adc3129ed6305b86ae246734abcfb2cf2 100644 (file)
--- a/test/CodeGen/X86/fp-intrinsics.ll
+++ b/test/CodeGen/X86/fp-intrinsics.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
  
  ; Verify that constants aren't folded to inexact results when the rounding mode
  ; is unknown.
@@ -9,7 +10,7 @@
  ; }
  ;
  ; CHECK-LABEL: f1
-; CHECK: divsd
+; COMMON: divsd
  define double @f1() {
  entry:
    %div = call double @llvm.experimental.constrained.fdiv.f64(
@@ -29,7 +30,7 @@ entry:
  ; }
  ;
  ; CHECK-LABEL: f2
-; CHECK:  subsd
+; COMMON:  subsd
  define double @f2(double %a) {
  entry:
    %div = call double @llvm.experimental.constrained.fsub.f64(
@@ -50,9 +51,9 @@ entry:
  ; }
  ;
  ; CHECK-LABEL: f3:
-; CHECK:  subsd
-; CHECK:  mulsd
-; CHECK:  subsd
+; COMMON:  subsd
+; COMMON:  mulsd
+; COMMON:  subsd
  define double @f3(double %a, double %b) {
  entry:
    %sub = call double @llvm.experimental.constrained.fsub.f64(
@@ -81,11 +82,11 @@ entry:
  ;   return a;
  ; }
  ;
-; 
+;
  ; CHECK-LABEL: f4:
-; CHECK: testl
-; CHECK: jle
-; CHECK: addsd
+; COMMON: testl
+; COMMON: jle
+; COMMON: addsd
  define double @f4(i32 %n, double %a) {
  entry:
    %cmp = icmp sgt i32 %n, 0
@@ -105,7 +106,7 @@ if.end:
  
  ; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f5
-; CHECK:  sqrtsd
+; COMMON:  sqrtsd
  define double @f5() {
  entry:
    %result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
@@ -116,7 +117,7 @@ entry:
  
  ; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f6
-; CHECK:  pow
+; COMMON:  pow
  define double @f6() {
  entry:
    %result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
@@ -128,7 +129,7 @@ entry:
  
  ; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f7
-; CHECK:  powi
+; COMMON:  powi
  define double @f7() {
  entry:
    %result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
@@ -140,7 +141,7 @@ entry:
  
  ; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f8
-; CHECK:  sin
+; COMMON:  sin
  define double @f8() {
  entry:
    %result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
@@ -151,7 +152,7 @@ entry:
  
  ; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f9
-; CHECK:  cos
+; COMMON:  cos
  define double @f9() {
  entry:
    %result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
@@ -162,7 +163,7 @@ entry:
  
  ; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f10
-; CHECK:  exp
+; COMMON:  exp
  define double @f10() {
  entry:
    %result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
@@ -173,7 +174,7 @@ entry:
  
  ; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f11
-; CHECK:  exp2
+; COMMON:  exp2
  define double @f11() {
  entry:
    %result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
@@ -184,7 +185,7 @@ entry:
  
  ; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f12
-; CHECK:  log
+; COMMON:  log
  define double @f12() {
  entry:
    %result = call double @llvm.experimental.constrained.log.f64(double 42.0,
@@ -195,7 +196,7 @@ entry:
  
  ; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f13
-; CHECK:  log10
+; COMMON:  log10
  define double @f13() {
  entry:
    %result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
@@ -206,7 +207,7 @@ entry:
  
  ; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f14
-; CHECK:  log2
+; COMMON:  log2
  define double @f14() {
  entry:
    %result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
@@ -217,7 +218,8 @@ entry:
  
  ; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f15
-; CHECK:  rint
+; NO-FMA:  rint
+; HAS-FMA: vroundsd
  define double @f15() {
  entry:
    %result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
@@ -229,7 +231,8 @@ entry:
  ; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
  ; unknown.
  ; CHECK-LABEL: f16
-; CHECK:  nearbyint
+; NO-FMA:  nearbyint
+; HAS-FMA: vroundsd
  define double @f16() {
  entry:
    %result = call double @llvm.experimental.constrained.nearbyint.f64(
@@ -239,6 +242,38 @@ entry:
    ret double %result
  }
  
+; Verify that fma(3.5) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; FMACALL32: jmp fmaf  # TAILCALL
+; FMA32: vfmadd213ss
+define float @f17() {
+entry:
+  %result = call float @llvm.experimental.constrained.fma.f32(
+                                               float 3.5,
+                                               float 3.5,
+                                               float 3.5,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret float %result
+}
+
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f18
+; FMACALL64: jmp fma  # TAILCALL
+; FMA64: vfmadd213sd
+define double @f18() {
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(
+                                               double 42.1,
+                                               double 42.1,
+                                               double 42.1,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %result
+}
+
  @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
  declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
  declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@@ -256,3 +291,5 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
  declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
  declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
  declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
diff --git a/test/Feature/fp-intrinsics.ll b/test/Feature/fp-intrinsics.ll

index f21ba15b2d4997ed603c93b1630eecb9f8fbe490..e1b253d1910dc52bd01061924566332505a0df3a 100644 (file)
--- a/test/Feature/fp-intrinsics.ll
+++ b/test/Feature/fp-intrinsics.ll
@@ -73,7 +73,7 @@ entry:
  ;   return a;
  ; }
  ;
-; 
+;
  ; CHECK-LABEL: @f4
  ; CHECK-NOT: select
  ; CHECK: br i1 %cmp
@@ -94,7 +94,6 @@ if.end:
    ret double %a.0
  }
  
-
  ; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
  ; CHECK-LABEL: f5
  ; CHECK: call double @llvm.experimental.constrained.sqrt
@@ -231,6 +230,18 @@ entry:
    ret double %result
  }
  
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; CHECK: call double @llvm.experimental.constrained.fma
+define double @f17() {
+entry:
+  %result = call double @llvm.experimental.constrained.fma.f64(double 42.1, double 42.1, double 42.1,
+                                               metadata !"round.dynamic",
+                                               metadata !"fpexcept.strict")
+  ret double %result
+}
+
  @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
  declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
  declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
@@ -248,3 +259,4 @@ declare double @llvm.experimental.constrained.log10.f64(double, metadata, metada
  declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
  declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
  declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
author	Wei Ding <wei.ding2@amd.com>
	Thu, 24 Aug 2017 04:18:24 +0000 (04:18 +0000)
committer	Wei Ding <wei.ding2@amd.com>
	Thu, 24 Aug 2017 04:18:24 +0000 (04:18 +0000)
docs/LangRef.rst		patch \| blob \| history
include/llvm/CodeGen/ISDOpcodes.h		patch \| blob \| history
include/llvm/CodeGen/SelectionDAGNodes.h		patch \| blob \| history
include/llvm/IR/IntrinsicInst.h		patch \| blob \| history
include/llvm/IR/Intrinsics.td		patch \| blob \| history
lib/CodeGen/SelectionDAG/LegalizeDAG.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAG.cpp		patch \| blob \| history
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp		patch \| blob \| history
lib/IR/IntrinsicInst.cpp		patch \| blob \| history
lib/IR/Verifier.cpp		patch \| blob \| history
test/CodeGen/X86/fp-intrinsics.ll		patch \| blob \| history
test/Feature/fp-intrinsics.ll		patch \| blob \| history