value operands and has the same type as the operands. The remainder has the
same sign as the dividend.
+'``llvm.experimental.constrained.fma``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ declare <type>
+ @llvm.experimental.constrained.fma(<type> <op1>, <type> <op2>, <type> <op3>,
+ metadata <rounding mode>,
+ metadata <exception behavior>)
+
+Overview:
+"""""""""
+
+The '``llvm.experimental.constrained.fma``' intrinsic returns the result of a
+fused-multiply-add operation on its operands.
+
+Arguments:
+""""""""""
+
+The first three arguments to the '``llvm.experimental.constrained.fma``'
+intrinsic must be :ref:`floating point <t_floating>` or :ref:`vector
+<t_vector>` of floating point values. All arguments must have identical types.
+
+The fourth and fifth arguments specify the rounding mode and exception behavior
+as described above.
+
+Semantics:
+""""""""""
+
+The result produced is the product of the first two operands added to the third
+operand computed with infinite precision, and then rounded to the target
+precision.
Constrained libm-equivalent Intrinsics
--------------------------------------
/// They are used to limit optimizations while the DAG is being
/// optimized.
STRICT_FADD, STRICT_FSUB, STRICT_FMUL, STRICT_FDIV, STRICT_FREM,
+ STRICT_FMA,
/// Constrained versions of libm-equivalent floating point intrinsics.
/// These will be lowered to the equivalent non-constrained pseudo-op
/// Test if this node is a strict floating point pseudo-op.
bool isStrictFPOpcode() {
switch (NodeType) {
- default:
+ default:
return false;
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
case ISD::STRICT_FDIV:
case ISD::STRICT_FREM:
+ case ISD::STRICT_FMA:
case ISD::STRICT_FSQRT:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
};
bool isUnaryOp() const;
+ bool isTernaryOp() const;
RoundingMode getRoundingMode() const;
ExceptionBehavior getExceptionBehavior() const;
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
llvm_metadata_ty,
llvm_metadata_ty ]>;
+ def int_experimental_constrained_fma : Intrinsic<[ llvm_anyfloat_ty ],
+ [ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ llvm_metadata_ty,
+ llvm_metadata_ty ]>;
+
// These intrinsics are sensitive to the rounding mode so we need constrained
// versions of each of them. When strict rounding and exception control are
// not required the non-constrained versions of these intrinsics should be
case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
}
break;
case ISD::STRICT_FSQRT:
+ case ISD::STRICT_FMA:
case ISD::STRICT_FPOW:
case ISD::STRICT_FPOWI:
case ISD::STRICT_FSIN:
// If the index is dependent on the store we will introduce a cycle when
// creating the load (the load uses the index, and by replacing the chain
// we will make the index dependent on the load). Also, the store might be
- // dependent on the extractelement and introduce a cycle when creating
+ // dependent on the extractelement and introduce a cycle when creating
// the load.
if (SDNode::hasPredecessorHelper(ST, Visited, Worklist) ||
ST->hasPredecessor(Op.getNode()))
Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
RTLIB::FMA_F80, RTLIB::FMA_F128,
RTLIB::FMA_PPCF128));
+ case ISD::STRICT_FMA:
+ Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+ RTLIB::FMA_F80, RTLIB::FMA_F128,
+ RTLIB::FMA_PPCF128));
break;
case ISD::FADD:
Results.push_back(ExpandFPLibCall(Node, RTLIB::ADD_F32, RTLIB::ADD_F64,
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
bool IsUnary = false;
+ bool IsTernary = false;
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
SDNode *Res = nullptr;
if (IsUnary)
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
+ else if (IsTernary)
+ Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
+ Node->getOperand(2),
+ Node->getOperand(3)});
else
Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
Node->getOperand(2) });
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
case Intrinsic::experimental_constrained_frem:
Opcode = ISD::STRICT_FREM;
break;
+ case Intrinsic::experimental_constrained_fma:
+ Opcode = ISD::STRICT_FMA;
+ break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
if (FPI.isUnaryOp())
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
+ else if (FPI.isTernaryOp())
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ getValue(FPI.getArgOperand(1)),
+ getValue(FPI.getArgOperand(2)) });
else
- Result = DAG.getNode(Opcode, sdl, VTs,
+ Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
// are all subclasses of the CallInst class. Note that none of these classes
// has state or virtual methods, which is an important part of this gross/neat
// hack working.
-//
+//
// In some cases, arguments to intrinsics need to be generic and are defined as
// type pointer to empty struct { }*. To access the real item of interest the
-// cast instruction needs to be stripped away.
+// cast instruction needs to be stripped away.
//
//===----------------------------------------------------------------------===//
ConstrainedFPIntrinsic::RoundingMode
ConstrainedFPIntrinsic::getRoundingMode() const {
unsigned NumOperands = getNumArgOperands();
- Metadata *MD =
+ Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 2))->getMetadata();
if (!MD || !isa<MDString>(MD))
return rmInvalid;
ConstrainedFPIntrinsic::ExceptionBehavior
ConstrainedFPIntrinsic::getExceptionBehavior() const {
unsigned NumOperands = getNumArgOperands();
- Metadata *MD =
+ Metadata *MD =
dyn_cast<MetadataAsValue>(getArgOperand(NumOperands - 1))->getMetadata();
if (!MD || !isa<MDString>(MD))
return ebInvalid;
bool ConstrainedFPIntrinsic::isUnaryOp() const {
switch (getIntrinsicID()) {
- default:
+ default:
return false;
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_sin:
return true;
}
}
+
+bool ConstrainedFPIntrinsic::isTernaryOp() const {
+ switch (getIntrinsicID()) {
+ default:
+ return false;
+ case Intrinsic::experimental_constrained_fma:
+ return true;
+ }
+}
+
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
+ case Intrinsic::experimental_constrained_fma:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
void Verifier::visitConstrainedFPIntrinsic(ConstrainedFPIntrinsic &FPI) {
unsigned NumOperands = FPI.getNumArgOperands();
- Assert(((NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
- "invalid arguments for constrained FP intrinsic", &FPI);
+ Assert(((NumOperands == 5 && FPI.isTernaryOp()) ||
+ (NumOperands == 3 && FPI.isUnaryOp()) || (NumOperands == 4)),
+ "invalid arguments for constrained FP intrinsic", &FPI);
Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-1)),
"invalid exception behavior argument", &FPI);
Assert(isa<MetadataAsValue>(FPI.getArgOperand(NumOperands-2)),
-; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux < %s | FileCheck --check-prefix=COMMON --check-prefix=NO-FMA --check-prefix=FMACALL64 --check-prefix=FMACALL32 %s
+; RUN: llc -O3 -mtriple=x86_64-pc-linux -mattr=+fma < %s | FileCheck -check-prefix=COMMON --check-prefix=HAS-FMA --check-prefix=FMA64 --check-prefix=FMA32 %s
; Verify that constants aren't folded to inexact results when the rounding mode
; is unknown.
; }
;
; CHECK-LABEL: f1
-; CHECK: divsd
+; COMMON: divsd
define double @f1() {
entry:
%div = call double @llvm.experimental.constrained.fdiv.f64(
; }
;
; CHECK-LABEL: f2
-; CHECK: subsd
+; COMMON: subsd
define double @f2(double %a) {
entry:
%div = call double @llvm.experimental.constrained.fsub.f64(
; }
;
; CHECK-LABEL: f3:
-; CHECK: subsd
-; CHECK: mulsd
-; CHECK: subsd
+; COMMON: subsd
+; COMMON: mulsd
+; COMMON: subsd
define double @f3(double %a, double %b) {
entry:
%sub = call double @llvm.experimental.constrained.fsub.f64(
; return a;
; }
;
-;
+;
; CHECK-LABEL: f4:
-; CHECK: testl
-; CHECK: jle
-; CHECK: addsd
+; COMMON: testl
+; COMMON: jle
+; COMMON: addsd
define double @f4(i32 %n, double %a) {
entry:
%cmp = icmp sgt i32 %n, 0
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f5
-; CHECK: sqrtsd
+; COMMON: sqrtsd
define double @f5() {
entry:
%result = call double @llvm.experimental.constrained.sqrt.f64(double 42.0,
; Verify that pow(42.1, 3.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f6
-; CHECK: pow
+; COMMON: pow
define double @f6() {
entry:
%result = call double @llvm.experimental.constrained.pow.f64(double 42.1,
; Verify that powi(42.1, 3) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f7
-; CHECK: powi
+; COMMON: powi
define double @f7() {
entry:
%result = call double @llvm.experimental.constrained.powi.f64(double 42.1,
; Verify that sin(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f8
-; CHECK: sin
+; COMMON: sin
define double @f8() {
entry:
%result = call double @llvm.experimental.constrained.sin.f64(double 42.0,
; Verify that cos(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f9
-; CHECK: cos
+; COMMON: cos
define double @f9() {
entry:
%result = call double @llvm.experimental.constrained.cos.f64(double 42.0,
; Verify that exp(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f10
-; CHECK: exp
+; COMMON: exp
define double @f10() {
entry:
%result = call double @llvm.experimental.constrained.exp.f64(double 42.0,
; Verify that exp2(42.1) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f11
-; CHECK: exp2
+; COMMON: exp2
define double @f11() {
entry:
%result = call double @llvm.experimental.constrained.exp2.f64(double 42.1,
; Verify that log(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f12
-; CHECK: log
+; COMMON: log
define double @f12() {
entry:
%result = call double @llvm.experimental.constrained.log.f64(double 42.0,
; Verify that log10(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f13
-; CHECK: log10
+; COMMON: log10
define double @f13() {
entry:
%result = call double @llvm.experimental.constrained.log10.f64(double 42.0,
; Verify that log2(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f14
-; CHECK: log2
+; COMMON: log2
define double @f14() {
entry:
%result = call double @llvm.experimental.constrained.log2.f64(double 42.0,
; Verify that rint(42.1) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f15
-; CHECK: rint
+; NO-FMA: rint
+; HAS-FMA: vroundsd
define double @f15() {
entry:
%result = call double @llvm.experimental.constrained.rint.f64(double 42.1,
; Verify that nearbyint(42.1) isn't simplified when the rounding mode is
; unknown.
; CHECK-LABEL: f16
-; CHECK: nearbyint
+; NO-FMA: nearbyint
+; HAS-FMA: vroundsd
define double @f16() {
entry:
%result = call double @llvm.experimental.constrained.nearbyint.f64(
ret double %result
}
+; Verify that fma(3.5) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; FMACALL32: jmp fmaf # TAILCALL
+; FMA32: vfmadd213ss
+define float @f17() {
+entry:
+ %result = call float @llvm.experimental.constrained.fma.f32(
+ float 3.5,
+ float 3.5,
+ float 3.5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %result
+}
+
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f18
+; FMACALL64: jmp fma # TAILCALL
+; FMA64: vfmadd213sd
+define double @f18() {
+entry:
+ %result = call double @llvm.experimental.constrained.fma.f64(
+ double 42.1,
+ double 42.1,
+ double 42.1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %result
+}
+
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
; return a;
; }
;
-;
+;
; CHECK-LABEL: @f4
; CHECK-NOT: select
; CHECK: br i1 %cmp
ret double %a.0
}
-
; Verify that sqrt(42.0) isn't simplified when the rounding mode is unknown.
; CHECK-LABEL: f5
; CHECK: call double @llvm.experimental.constrained.sqrt
ret double %result
}
+; Verify that fma(42.1) isn't simplified when the rounding mode is
+; unknown.
+; CHECK-LABEL: f17
+; CHECK: call double @llvm.experimental.constrained.fma
+define double @f17() {
+entry:
+ %result = call double @llvm.experimental.constrained.fma.f64(double 42.1, double 42.1, double 42.1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %result
+}
+
@llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata"
declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
declare double @llvm.experimental.constrained.log2.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)