// no unsigned wrap.
NoSWrap = 1 << 12, // Instruction supports binary operator
// no signed wrap.
- IsExact = 1 << 13 // Instruction supports division is
+ IsExact = 1 << 13, // Instruction supports division is
// known to be exact.
+ FPExcept = 1 << 14, // Instruction may raise floating-point
+ // exceptions.
};
private:
return mayLoad(Type) || mayStore(Type);
}
+ /// Return true if this instruction could possibly raise a floating-point
+ /// exception. This is the case if the instruction is a floating-point
+ /// instruction that can in principle raise an exception, as indicated
+ /// by the MCID::MayRaiseFPException property, *and* at the same time,
+ /// the instruction is used in a context where we expect floating-point
+ /// exceptions might be enabled, as indicated by the FPExcept MI flag.
+ bool mayRaiseFPException() const {
+ return hasProperty(MCID::MayRaiseFPException) &&
+ getFlag(MachineInstr::MIFlag::FPExcept);
+ }
+
//===--------------------------------------------------------------------===//
// Flags that indicate whether an instruction can be modified by a method.
//===--------------------------------------------------------------------===//
bool ApproximateFuncs : 1;
bool AllowReassociation : 1;
+ // We assume instructions do not raise floating-point exceptions by default,
+ // and only those marked explicitly may do so. We could choose to represent
+ // this via a positive "FPExcept" flags like on the MI level, but having a
+ // negative "NoFPExcept" flag here (that defaults to true) makes the flag
+ // intersection logic more straightforward.
+ bool NoFPExcept : 1;
+
public:
/// Default constructor turns off all optimization flags.
SDNodeFlags()
Exact(false), NoNaNs(false), NoInfs(false),
NoSignedZeros(false), AllowReciprocal(false), VectorReduction(false),
AllowContract(false), ApproximateFuncs(false),
- AllowReassociation(false) {}
+ AllowReassociation(false), NoFPExcept(true) {}
/// Propagate the fast-math-flags from an IR FPMathOperator.
void copyFMF(const FPMathOperator &FPMO) {
setDefined();
AllowReassociation = b;
}
+ void setFPExcept(bool b) {
+ setDefined();
+ NoFPExcept = !b;
+ }
// These are accessors for each flag.
bool hasNoUnsignedWrap() const { return NoUnsignedWrap; }
bool hasAllowContract() const { return AllowContract; }
bool hasApproximateFuncs() const { return ApproximateFuncs; }
bool hasAllowReassociation() const { return AllowReassociation; }
+ bool hasFPExcept() const { return !NoFPExcept; }
bool isFast() const {
- return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs &&
+ return NoSignedZeros && AllowReciprocal && NoNaNs && NoInfs && NoFPExcept &&
AllowContract && ApproximateFuncs && AllowReassociation;
}
AllowContract &= Flags.AllowContract;
ApproximateFuncs &= Flags.ApproximateFuncs;
AllowReassociation &= Flags.AllowReassociation;
+ NoFPExcept &= Flags.NoFPExcept;
}
};
FoldableAsLoad,
MayLoad,
MayStore,
+ MayRaiseFPException,
Predicable,
NotDuplicable,
UnmodeledSideEffects,
/// may not actually modify anything, for example.
bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }
+ /// Return true if this instruction may raise a floating-point exception.
+ bool mayRaiseFPException() const {
+ return Flags & (1ULL << MCID::MayRaiseFPException);
+ }
+
/// Return true if this instruction has side
/// effects that are not modeled by other flags. This does not return true
/// for instructions whose effects are captured by:
bit canFoldAsLoad = 0; // Can this be folded as a simple memory operand?
bit mayLoad = ?; // Is it possible for this inst to read memory?
bit mayStore = ?; // Is it possible for this inst to write memory?
+ bit mayRaiseFPException = 0; // Can this raise a floating-point exception?
bit isConvertibleToThreeAddress = 0; // Can this 2-addr instruction promote?
bit isCommutable = 0; // Is this 3 operand instruction commutable?
bit isTerminator = 0; // Is this part of the terminator for a basic block?
def f16_to_fp : SDNode<"ISD::FP16_TO_FP" , SDTIntToFPOp>;
def fp_to_f16 : SDNode<"ISD::FP_TO_FP16" , SDTFPToIntOp>;
+def strict_fadd : SDNode<"ISD::STRICT_FADD",
+ SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fsub : SDNode<"ISD::STRICT_FSUB",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_fmul : SDNode<"ISD::STRICT_FMUL",
+ SDTFPBinOp, [SDNPHasChain, SDNPCommutative]>;
+def strict_fdiv : SDNode<"ISD::STRICT_FDIV",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_frem : SDNode<"ISD::STRICT_FREM",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_fma : SDNode<"ISD::STRICT_FMA",
+ SDTFPTernaryOp, [SDNPHasChain]>;
+def strict_fsqrt : SDNode<"ISD::STRICT_FSQRT",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fsin : SDNode<"ISD::STRICT_FSIN",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fcos : SDNode<"ISD::STRICT_FCOS",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fexp2 : SDNode<"ISD::STRICT_FEXP2",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fpow : SDNode<"ISD::STRICT_FPOW",
+ SDTFPBinOp, [SDNPHasChain]>;
+def strict_flog2 : SDNode<"ISD::STRICT_FLOG2",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_frint : SDNode<"ISD::STRICT_FRINT",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fnearbyint : SDNode<"ISD::STRICT_FNEARBYINT",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fceil : SDNode<"ISD::STRICT_FCEIL",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ffloor : SDNode<"ISD::STRICT_FFLOOR",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fround : SDNode<"ISD::STRICT_FROUND",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_ftrunc : SDNode<"ISD::STRICT_FTRUNC",
+ SDTFPUnaryOp, [SDNPHasChain]>;
+def strict_fminnum : SDNode<"ISD::STRICT_FMINNUM",
+ SDTFPBinOp, [SDNPHasChain,
+ SDNPCommutative, SDNPAssociative]>;
+def strict_fmaxnum : SDNode<"ISD::STRICT_FMAXNUM",
+ SDTFPBinOp, [SDNPHasChain,
+ SDNPCommutative, SDNPAssociative]>;
+def strict_fpround : SDNode<"ISD::STRICT_FP_ROUND",
+ SDTFPRoundOp, [SDNPHasChain]>;
+def strict_fpextend : SDNode<"ISD::STRICT_FP_EXTEND",
+ SDTFPExtendOp, [SDNPHasChain]>;
+
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>;
def setne : PatFrag<(ops node:$lhs, node:$rhs),
(setcc node:$lhs, node:$rhs, SETNE)>;
+// Convenience fragments to match both strict and non-strict fp operations
+def any_fadd : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fadd node:$lhs, node:$rhs),
+ (fadd node:$lhs, node:$rhs)]>;
+def any_fsub : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fsub node:$lhs, node:$rhs),
+ (fsub node:$lhs, node:$rhs)]>;
+def any_fmul : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fmul node:$lhs, node:$rhs),
+ (fmul node:$lhs, node:$rhs)]>;
+def any_fdiv : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fdiv node:$lhs, node:$rhs),
+ (fdiv node:$lhs, node:$rhs)]>;
+def any_frem : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_frem node:$lhs, node:$rhs),
+ (frem node:$lhs, node:$rhs)]>;
+def any_fma : PatFrags<(ops node:$src1, node:$src2, node:$src3),
+ [(strict_fma node:$src1, node:$src2, node:$src3),
+ (fma node:$src1, node:$src2, node:$src3)]>;
+def any_fsqrt : PatFrags<(ops node:$src),
+ [(strict_fsqrt node:$src),
+ (fsqrt node:$src)]>;
+def any_fsin : PatFrags<(ops node:$src),
+ [(strict_fsin node:$src),
+ (fsin node:$src)]>;
+def any_fcos : PatFrags<(ops node:$src),
+ [(strict_fcos node:$src),
+ (fcos node:$src)]>;
+def any_fexp2 : PatFrags<(ops node:$src),
+ [(strict_fexp2 node:$src),
+ (fexp2 node:$src)]>;
+def any_fpow : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fpow node:$lhs, node:$rhs),
+ (fpow node:$lhs, node:$rhs)]>;
+def any_flog2 : PatFrags<(ops node:$src),
+ [(strict_flog2 node:$src),
+ (flog2 node:$src)]>;
+def any_frint : PatFrags<(ops node:$src),
+ [(strict_frint node:$src),
+ (frint node:$src)]>;
+def any_fnearbyint : PatFrags<(ops node:$src),
+ [(strict_fnearbyint node:$src),
+ (fnearbyint node:$src)]>;
+def any_fceil : PatFrags<(ops node:$src),
+ [(strict_fceil node:$src),
+ (fceil node:$src)]>;
+def any_ffloor : PatFrags<(ops node:$src),
+ [(strict_ffloor node:$src),
+ (ffloor node:$src)]>;
+def any_fround : PatFrags<(ops node:$src),
+ [(strict_fround node:$src),
+ (fround node:$src)]>;
+def any_ftrunc : PatFrags<(ops node:$src),
+ [(strict_ftrunc node:$src),
+ (ftrunc node:$src)]>;
+def any_fmaxnum : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fmaxnum node:$lhs, node:$rhs),
+ (fmaxnum node:$lhs, node:$rhs)]>;
+def any_fminnum : PatFrags<(ops node:$lhs, node:$rhs),
+ [(strict_fminnum node:$lhs, node:$rhs),
+ (fminnum node:$lhs, node:$rhs)]>;
+def any_fpround : PatFrags<(ops node:$src),
+ [(strict_fpround node:$src),
+ (fpround node:$src)]>;
+def any_fpextend : PatFrags<(ops node:$src),
+ [(strict_fpextend node:$src),
+ (fpextend node:$src)]>;
+
multiclass binary_atomic_op_ord<SDNode atomic_op> {
def #NAME#_monotonic : PatFrag<(ops node:$ptr, node:$val),
(!cast<SDPatternOperator>(#NAME) node:$ptr, node:$val)> {
std::next(MI.getIterator()) == IntoMI.getIterator())
return true;
- return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
- empty(MI.implicit_operands());
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
}
} // end anonymous namespace
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
- if (MI->isCall() || MI->hasUnmodeledSideEffects())
+ if (MI->isCall() || MI->mayRaiseFPException() ||
+ MI->hasUnmodeledSideEffects())
return false;
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
.Case("nuw" , MIToken::kw_nuw)
.Case("nsw" , MIToken::kw_nsw)
.Case("exact" , MIToken::kw_exact)
+ .Case("fpexcept", MIToken::kw_fpexcept)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
kw_nuw,
kw_nsw,
kw_exact,
+ kw_fpexcept,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
Token.is(MIToken::kw_reassoc) ||
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
- Token.is(MIToken::kw_exact)) {
+ Token.is(MIToken::kw_exact) ||
+ Token.is(MIToken::kw_fpexcept)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
Flags |= MachineInstr::NoSWrap;
if (Token.is(MIToken::kw_exact))
Flags |= MachineInstr::IsExact;
+ if (Token.is(MIToken::kw_fpexcept))
+ Flags |= MachineInstr::FPExcept;
lex();
}
OS << "nsw ";
if (MI.getFlag(MachineInstr::IsExact))
OS << "exact ";
+ if (MI.getFlag(MachineInstr::FPExcept))
+ OS << "fpexcept ";
OS << TII->getName(MI.getOpcode());
if (I < E)
// Ignore stuff that we obviously can't move.
if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
- MI->hasUnmodeledSideEffects())
+ MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
return false;
if (MI->mayLoad()) {
}
if (isPosition() || isDebugInstr() || isTerminator() ||
- hasUnmodeledSideEffects())
+ mayRaiseFPException() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
OS << "nsw ";
if (getFlag(MachineInstr::IsExact))
OS << "exact ";
+ if (getFlag(MachineInstr::FPExcept))
+ OS << "fpexcept ";
// Print the opcode name.
if (TII)
/// Return true if the instruction causes a chain between memory
/// references before and after it.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
- return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+ return MI.isCall() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects() ||
(MI.hasOrderedMemoryRef() &&
(!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
}
// Assume ordered loads and stores may have a loop carried dependence.
if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+ SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
return true;
assert(Def->isBitcast() && "Invalid definition");
// Bail if there are effects that a plain copy will not expose.
- if (Def->hasUnmodeledSideEffects())
+ if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
return ValueTrackerResult();
// Bitcasts with more than one def are not supported.
AAForDep = UseAA ? AA : nullptr;
BarrierChain = nullptr;
+ SUnit *FPBarrierChain = nullptr;
this->TrackLaneMasks = TrackLaneMasks;
MISUnitMap.clear();
addBarrierChain(NonAliasStores);
addBarrierChain(NonAliasLoads);
+ // Add dependency against previous FP barrier and reset FP barrier.
+ if (FPBarrierChain)
+ FPBarrierChain->addPredBarrier(BarrierChain);
+ FPBarrierChain = BarrierChain;
+
continue;
}
+ // Instructions that may raise FP exceptions depend on each other.
+ if (MI.mayRaiseFPException()) {
+ if (FPBarrierChain)
+ FPBarrierChain->addPredBarrier(SU);
+ FPBarrierChain = SU;
+ }
+
// If it's not a store or a variant load, we're done.
if (!MI.mayStore() &&
!(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
if (Flags.hasExact())
MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+ if (Flags.hasFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::FPExcept);
}
// Emit all of the actual operands of this instruction, adding them to the
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
+ if (FPI.getExceptionBehavior() !=
+ ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
+ SDNodeFlags Flags;
+ Flags.setFPExcept(true);
+ Result->setFlags(Flags);
+ }
+
assert(Result.getNode()->getNumValues() == 2);
SDValue OutChain = Result.getValue(1);
DAG.setRoot(OutChain);
#endif
// When we are using non-default rounding modes or FP exception behavior
- // FP operations are represented by StrictFP pseudo-operations. They
- // need to be simplified here so that the target-specific instruction
- // selectors know how to handle them.
- //
- // If the current node is a strict FP pseudo-op, the isStrictFPOp()
- // function will provide the corresponding normal FP opcode to which the
- // node should be mutated.
- //
- // FIXME: The backends need a way to handle FP constraints.
- if (Node->isStrictFPOpcode())
+ // FP operations are represented by StrictFP pseudo-operations. For
+ // targets that do not (yet) understand strict FP operations directly,
+ // we convert them to normal FP opcodes instead at this point. This
+ // will allow them to be handled by existing target-specific instruction
+ // selectors.
+ if (Node->isStrictFPOpcode() &&
+ (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+ != TargetLowering::Legal))
Node = CurDAG->mutateStrictFPToFP(Node);
LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
return true;
// Avoid instructions obviously unsafe for remat.
- if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
+ if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects())
return false;
// Don't remat inline asm. We have no idea how expensive it is
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
+ // Constrained floating-point operations default to expand.
+ setOperationAction(ISD::STRICT_FADD, VT, Expand);
+ setOperationAction(ISD::STRICT_FSUB, VT, Expand);
+ setOperationAction(ISD::STRICT_FMUL, VT, Expand);
+ setOperationAction(ISD::STRICT_FDIV, VT, Expand);
+ setOperationAction(ISD::STRICT_FREM, VT, Expand);
+ setOperationAction(ISD::STRICT_FMA, VT, Expand);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
+ setOperationAction(ISD::STRICT_FPOW, VT, Expand);
+ setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
+ setOperationAction(ISD::STRICT_FSIN, VT, Expand);
+ setOperationAction(ISD::STRICT_FCOS, VT, Expand);
+ setOperationAction(ISD::STRICT_FEXP, VT, Expand);
+ setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+ setOperationAction(ISD::STRICT_FRINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+ setOperationAction(ISD::STRICT_FROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
+ setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
+ setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
setOperationAction(ISD::FSINCOS, VT, Expand);
setOperationAction(ISD::FREM, VT, Expand);
setOperationAction(ISD::FPOW, VT, Expand);
+
+ // Handle constrained floating-point operations.
+ setOperationAction(ISD::STRICT_FADD, VT, Legal);
+ setOperationAction(ISD::STRICT_FSUB, VT, Legal);
+ setOperationAction(ISD::STRICT_FMUL, VT, Legal);
+ setOperationAction(ISD::STRICT_FDIV, VT, Legal);
+ setOperationAction(ISD::STRICT_FMA, VT, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Legal);
+ setOperationAction(ISD::STRICT_FRINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Legal);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Legal);
+ if (Subtarget.hasFPExtension()) {
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
+ setOperationAction(ISD::STRICT_FROUND, VT, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
+ }
}
}
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
+
+ // Handle constrained floating-point operations.
+ setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v2f64, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v2f64, Legal);
}
// The vector enhancements facility 1 has instructions for these.
setOperationAction(ISD::FMAXIMUM, MVT::f128, Legal);
setOperationAction(ISD::FMINNUM, MVT::f128, Legal);
setOperationAction(ISD::FMINIMUM, MVT::f128, Legal);
+
+ // Handle constrained floating-point operations.
+ setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FMA, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FRINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FNEARBYINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FFLOOR, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FCEIL, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FROUND, MVT::v4f32, Legal);
+ setOperationAction(ISD::STRICT_FTRUNC, MVT::v4f32, Legal);
+ for (auto VT : { MVT::f32, MVT::f64, MVT::f128,
+ MVT::v4f32, MVT::v2f64 }) {
+ setOperationAction(ISD::STRICT_FMAXNUM, VT, Legal);
+ setOperationAction(ISD::STRICT_FMINNUM, VT, Legal);
+ }
}
// We have fused multiply-addition for f32 and f64 but not f128.
// Moves between two floating-point registers that also set the condition
// codes.
-let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
// Use a normal load-and-test for compare against zero in case of
// vector support (via a pseudo to simplify instruction selection).
-let Uses = [FPC], Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
def LTEBRCompare_VecPseudo : Pseudo<(outs), (ins FP32:$R1, FP32:$R2), []>;
def LTDBRCompare_VecPseudo : Pseudo<(outs), (ins FP64:$R1, FP64:$R2), []>;
def LTXBRCompare_VecPseudo : Pseudo<(outs), (ins FP128:$R1, FP128:$R2), []>;
// Convert floating-point values to narrower representations, rounding
// according to the current mode. The destination of LEXBR and LDXBR
// is a 128-bit value, but only the first register of the pair is used.
-let Uses = [FPC] in {
- def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround, FP32, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def LEDBR : UnaryRRE<"ledbr", 0xB344, any_fpround, FP32, FP64>;
def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
}
let Predicates = [FeatureNoVectorEnhancements1] in {
- def : Pat<(f32 (fpround FP128:$src)),
+ def : Pat<(f32 (any_fpround FP128:$src)),
(EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
- def : Pat<(f64 (fpround FP128:$src)),
+ def : Pat<(f64 (any_fpround FP128:$src)),
(EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
}
// Extend register floating-point values to wider representations.
-let Uses = [FPC] in {
- def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64, FP32>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def LDEBR : UnaryRRE<"ldebr", 0xB304, any_fpextend, FP64, FP32>;
def LXEBR : UnaryRRE<"lxebr", 0xB306, null_frag, FP128, FP32>;
def LXDBR : UnaryRRE<"lxdbr", 0xB305, null_frag, FP128, FP64>;
}
let Predicates = [FeatureNoVectorEnhancements1] in {
- def : Pat<(f128 (fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
- def : Pat<(f128 (fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
+ def : Pat<(f128 (any_fpextend (f32 FP32:$src))), (LXEBR FP32:$src)>;
+ def : Pat<(f128 (any_fpextend (f64 FP64:$src))), (LXDBR FP64:$src)>;
}
// Extend memory floating-point values to wider representations.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64, 4>;
def LXEB : UnaryRXE<"lxeb", 0xED06, null_frag, FP128, 4>;
def LXDB : UnaryRXE<"lxdb", 0xED05, null_frag, FP128, 8>;
}
// Convert a signed integer register value to a floating-point one.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32, GR32>;
def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64, GR32>;
def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
// The FP extension feature provides versions of the above that allow
// specifying rounding mode and inexact-exception suppression flags.
-let Uses = [FPC], Predicates = [FeatureFPExtension] in {
+let Uses = [FPC], mayRaiseFPException = 1, Predicates = [FeatureFPExtension] in {
def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32, GR32>;
def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64, GR32>;
def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
// Convert am unsigned integer register value to a floating-point one.
let Predicates = [FeatureFPExtension] in {
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32, GR32>;
def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64, GR32>;
def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
// Convert a floating-point register value to a signed integer value,
// with the second operand (modifier M3) specifying the rounding mode.
-let Uses = [FPC], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
// The FP extension feature provides versions of the above that allow
// also specifying the inexact-exception suppression flag.
-let Uses = [FPC], Predicates = [FeatureFPExtension], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureFPExtension], Defs = [CC] in {
def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
// Convert a floating-point register value to an unsigned integer value.
let Predicates = [FeatureFPExtension] in {
- let Uses = [FPC], Defs = [CC] in {
+ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32, FP32>;
// Square root.
-let Uses = [FPC] in {
- def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32, FP32>;
- def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64, FP64>;
- def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def SQEBR : UnaryRRE<"sqebr", 0xB314, any_fsqrt, FP32, FP32>;
+ def SQDBR : UnaryRRE<"sqdbr", 0xB315, any_fsqrt, FP64, FP64>;
+ def SQXBR : UnaryRRE<"sqxbr", 0xB316, any_fsqrt, FP128, FP128>;
- def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
- def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
+ def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<any_fsqrt>, FP32, 4>;
+ def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<any_fsqrt>, FP64, 8>;
}
// Round to an integer, with the second operand (modifier M3) specifying
// the rounding mode. These forms always check for inexact conditions.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32, FP32>;
def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64, FP64>;
def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
// frint rounds according to the current mode (modifier 0) and detects
// inexact conditions.
-def : Pat<(frint FP32:$src), (FIEBR 0, FP32:$src)>;
-def : Pat<(frint FP64:$src), (FIDBR 0, FP64:$src)>;
-def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
+def : Pat<(any_frint FP32:$src), (FIEBR 0, FP32:$src)>;
+def : Pat<(any_frint FP64:$src), (FIDBR 0, FP64:$src)>;
+def : Pat<(any_frint FP128:$src), (FIXBR 0, FP128:$src)>;
let Predicates = [FeatureFPExtension] in {
// Extended forms of the FIxBR instructions. M4 can be set to 4
// to suppress detection of inexact conditions.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32, FP32>;
def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64, FP64>;
def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
}
// fnearbyint is like frint but does not detect inexact conditions.
- def : Pat<(fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
- def : Pat<(fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
- def : Pat<(fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
+ def : Pat<(any_fnearbyint FP32:$src), (FIEBRA 0, FP32:$src, 4)>;
+ def : Pat<(any_fnearbyint FP64:$src), (FIDBRA 0, FP64:$src, 4)>;
+ def : Pat<(any_fnearbyint FP128:$src), (FIXBRA 0, FP128:$src, 4)>;
// floor is no longer allowed to raise an inexact condition,
// so restrict it to the cases where the condition can be suppressed.
// Mode 7 is round towards -inf.
- def : Pat<(ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
- def : Pat<(ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
- def : Pat<(ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
+ def : Pat<(any_ffloor FP32:$src), (FIEBRA 7, FP32:$src, 4)>;
+ def : Pat<(any_ffloor FP64:$src), (FIDBRA 7, FP64:$src, 4)>;
+ def : Pat<(any_ffloor FP128:$src), (FIXBRA 7, FP128:$src, 4)>;
// Same idea for ceil, where mode 6 is round towards +inf.
- def : Pat<(fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
- def : Pat<(fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
- def : Pat<(fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
+ def : Pat<(any_fceil FP32:$src), (FIEBRA 6, FP32:$src, 4)>;
+ def : Pat<(any_fceil FP64:$src), (FIDBRA 6, FP64:$src, 4)>;
+ def : Pat<(any_fceil FP128:$src), (FIXBRA 6, FP128:$src, 4)>;
// Same idea for trunc, where mode 5 is round towards zero.
- def : Pat<(ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
- def : Pat<(ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
- def : Pat<(ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
+ def : Pat<(any_ftrunc FP32:$src), (FIEBRA 5, FP32:$src, 4)>;
+ def : Pat<(any_ftrunc FP64:$src), (FIDBRA 5, FP64:$src, 4)>;
+ def : Pat<(any_ftrunc FP128:$src), (FIXBRA 5, FP128:$src, 4)>;
// Same idea for round, where mode 1 is round towards nearest with
// ties away from zero.
- def : Pat<(fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
- def : Pat<(fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
- def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+ def : Pat<(any_fround FP32:$src), (FIEBRA 1, FP32:$src, 4)>;
+ def : Pat<(any_fround FP64:$src), (FIDBRA 1, FP64:$src, 4)>;
+ def : Pat<(any_fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
}
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Addition.
-let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
let isCommutable = 1 in {
- def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32, FP32>;
- def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64, FP64>;
- def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
+ def AEBR : BinaryRRE<"aebr", 0xB30A, any_fadd, FP32, FP32>;
+ def ADBR : BinaryRRE<"adbr", 0xB31A, any_fadd, FP64, FP64>;
+ def AXBR : BinaryRRE<"axbr", 0xB34A, any_fadd, FP128, FP128>;
}
- def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
- def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
+ def AEB : BinaryRXE<"aeb", 0xED0A, any_fadd, FP32, load, 4>;
+ def ADB : BinaryRXE<"adb", 0xED1A, any_fadd, FP64, load, 8>;
}
// Subtraction.
-let Uses = [FPC], Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
- def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32, FP32>;
- def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64, FP64>;
- def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1,
+ Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
+ def SEBR : BinaryRRE<"sebr", 0xB30B, any_fsub, FP32, FP32>;
+ def SDBR : BinaryRRE<"sdbr", 0xB31B, any_fsub, FP64, FP64>;
+ def SXBR : BinaryRRE<"sxbr", 0xB34B, any_fsub, FP128, FP128>;
- def SEB : BinaryRXE<"seb", 0xED0B, fsub, FP32, load, 4>;
- def SDB : BinaryRXE<"sdb", 0xED1B, fsub, FP64, load, 8>;
+ def SEB : BinaryRXE<"seb", 0xED0B, any_fsub, FP32, load, 4>;
+ def SDB : BinaryRXE<"sdb", 0xED1B, any_fsub, FP64, load, 8>;
}
// Multiplication.
-let Uses = [FPC] in {
+let Uses = [FPC], mayRaiseFPException = 1 in {
let isCommutable = 1 in {
- def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32, FP32>;
- def MDBR : BinaryRRE<"mdbr", 0xB31C, fmul, FP64, FP64>;
- def MXBR : BinaryRRE<"mxbr", 0xB34C, fmul, FP128, FP128>;
+ def MEEBR : BinaryRRE<"meebr", 0xB317, any_fmul, FP32, FP32>;
+ def MDBR : BinaryRRE<"mdbr", 0xB31C, any_fmul, FP64, FP64>;
+ def MXBR : BinaryRRE<"mxbr", 0xB34C, any_fmul, FP128, FP128>;
}
- def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
- def MDB : BinaryRXE<"mdb", 0xED1C, fmul, FP64, load, 8>;
+ def MEEB : BinaryRXE<"meeb", 0xED17, any_fmul, FP32, load, 4>;
+ def MDB : BinaryRXE<"mdb", 0xED1C, any_fmul, FP64, load, 8>;
}
// f64 multiplication of two FP32 registers.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+ (f64 (fpextend FP32:$src2))),
(MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
FP32:$src1, subreg_h32), FP32:$src2)>;
// f64 multiplication of an FP32 register and an f32 memory.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(fmul (f64 (fpextend FP32:$src1)),
- (f64 (extloadf32 bdxaddr12only:$addr))),
+def : Pat<(any_fmul (f64 (fpextend FP32:$src1)),
+ (f64 (extloadf32 bdxaddr12only:$addr))),
(MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
bdxaddr12only:$addr)>;
// f128 multiplication of two FP64 registers.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
+ def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+ (f128 (fpextend FP64:$src2))),
(MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
FP64:$src1, subreg_h64), FP64:$src2)>;
// f128 multiplication of an FP64 register and an f64 memory.
-let Uses = [FPC] in
+let Uses = [FPC], mayRaiseFPException = 1 in
def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
let Predicates = [FeatureNoVectorEnhancements1] in
- def : Pat<(fmul (f128 (fpextend FP64:$src1)),
- (f128 (extloadf64 bdxaddr12only:$addr))),
+ def : Pat<(any_fmul (f128 (fpextend FP64:$src1)),
+ (f128 (extloadf64 bdxaddr12only:$addr))),
(MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
bdxaddr12only:$addr)>;
// Fused multiply-add.
-let Uses = [FPC] in {
- def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32, FP32>;
- def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def MAEBR : TernaryRRD<"maebr", 0xB30E, z_any_fma, FP32, FP32>;
+ def MADBR : TernaryRRD<"madbr", 0xB31E, z_any_fma, FP64, FP64>;
- def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, FP32, load, 4>;
- def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, FP64, load, 8>;
+ def MAEB : TernaryRXF<"maeb", 0xED0E, z_any_fma, FP32, FP32, load, 4>;
+ def MADB : TernaryRXF<"madb", 0xED1E, z_any_fma, FP64, FP64, load, 8>;
}
// Fused multiply-subtract.
-let Uses = [FPC] in {
- def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32, FP32>;
- def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64, FP64>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def MSEBR : TernaryRRD<"msebr", 0xB30F, z_any_fms, FP32, FP32>;
+ def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_any_fms, FP64, FP64>;
- def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, FP32, load, 4>;
- def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, FP64, load, 8>;
+ def MSEB : TernaryRXF<"mseb", 0xED0F, z_any_fms, FP32, FP32, load, 4>;
+ def MSDB : TernaryRXF<"msdb", 0xED1F, z_any_fms, FP64, FP64, load, 8>;
}
// Division.
-let Uses = [FPC] in {
- def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32, FP32>;
- def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64, FP64>;
- def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
+let Uses = [FPC], mayRaiseFPException = 1 in {
+ def DEBR : BinaryRRE<"debr", 0xB30D, any_fdiv, FP32, FP32>;
+ def DDBR : BinaryRRE<"ddbr", 0xB31D, any_fdiv, FP64, FP64>;
+ def DXBR : BinaryRRE<"dxbr", 0xB34D, any_fdiv, FP128, FP128>;
- def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
- def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
+ def DEB : BinaryRXE<"deb", 0xED0D, any_fdiv, FP32, load, 4>;
+ def DDB : BinaryRXE<"ddb", 0xED1D, any_fdiv, FP64, load, 8>;
}
// Divide to integer.
-let Uses = [FPC], Defs = [CC] in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def DIEBR : TernaryRRFb<"diebr", 0xB353, FP32, FP32, FP32>;
def DIDBR : TernaryRRFb<"didbr", 0xB35B, FP64, FP64, FP64>;
}
// Comparisons
//===----------------------------------------------------------------------===//
-let Uses = [FPC], Defs = [CC], CCValues = 0xF in {
+let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC], CCValues = 0xF in {
def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32, FP32>;
def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64, FP64>;
def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
}
}
- let Defs = [FPC] in {
+ let Defs = [FPC], mayRaiseFPException = 1 in {
def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
def LFAS : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
}
// See comments in SystemZInstrFP.td for the suppression flags and
// rounding modes.
multiclass VectorRounding<Instruction insn, TypedReg tr> {
- def : FPConversion<insn, frint, tr, tr, 0, 0>;
- def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
- def : FPConversion<insn, ffloor, tr, tr, 4, 7>;
- def : FPConversion<insn, fceil, tr, tr, 4, 6>;
- def : FPConversion<insn, ftrunc, tr, tr, 4, 5>;
- def : FPConversion<insn, fround, tr, tr, 4, 1>;
+ def : FPConversion<insn, any_frint, tr, tr, 0, 0>;
+ def : FPConversion<insn, any_fnearbyint, tr, tr, 4, 0>;
+ def : FPConversion<insn, any_ffloor, tr, tr, 4, 7>;
+ def : FPConversion<insn, any_fceil, tr, tr, 4, 6>;
+ def : FPConversion<insn, any_ftrunc, tr, tr, 4, 5>;
+ def : FPConversion<insn, any_fround, tr, tr, 4, 1>;
}
let Predicates = [FeatureVector] in {
// Add.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFA : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
- def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
- def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
+ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, any_fadd, v128db, v128db, 3, 0>;
+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, any_fadd, v64db, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFASB : BinaryVRRc<"vfasb", 0xE7E3, fadd, v128sb, v128sb, 2, 0>;
- def WFASB : BinaryVRRc<"wfasb", 0xE7E3, fadd, v32sb, v32sb, 2, 8>;
- def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, fadd, v128xb, v128xb, 4, 8>;
+ def VFASB : BinaryVRRc<"vfasb", 0xE7E3, any_fadd, v128sb, v128sb, 2, 0>;
+ def WFASB : BinaryVRRc<"wfasb", 0xE7E3, any_fadd, v32sb, v32sb, 2, 8>;
+ def WFAXB : BinaryVRRc<"wfaxb", 0xE7E3, any_fadd, v128xb, v128xb, 4, 8>;
}
}
// Convert from fixed 64-bit.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
// Convert from logical 64-bit.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
// Convert to fixed 64-bit.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
// Convert to logical 64-bit.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
// Divide.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFD : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
- def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
- def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
+ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, any_fdiv, v128db, v128db, 3, 0>;
+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, any_fdiv, v64db, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, fdiv, v128sb, v128sb, 2, 0>;
- def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, fdiv, v32sb, v32sb, 2, 8>;
- def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, fdiv, v128xb, v128xb, 4, 8>;
+ def VFDSB : BinaryVRRc<"vfdsb", 0xE7E5, any_fdiv, v128sb, v128sb, 2, 0>;
+ def WFDSB : BinaryVRRc<"wfdsb", 0xE7E5, any_fdiv, v32sb, v32sb, 2, 8>;
+ def WFDXB : BinaryVRRc<"wfdxb", 0xE7E5, any_fdiv, v128xb, v128xb, 4, 8>;
}
}
// Load FP integer.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFI : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
defm : VectorRounding<VFIDB, v128db>;
defm : VectorRounding<WFIDB, v64db>;
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFISB : TernaryVRRa<"vfisb", 0xE7C7, int_s390_vfisb, v128sb, v128sb, 2, 0>;
def WFISB : TernaryVRRa<"wfisb", 0xE7C7, null_frag, v32sb, v32sb, 2, 8>;
def WFIXB : TernaryVRRa<"wfixb", 0xE7C7, null_frag, v128xb, v128xb, 4, 8>;
}
// Load lengthened.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VLDE : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128sb, 2, 0>;
- def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32sb, 2, 8>;
+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, any_fpextend, v64db, v32sb, 2, 8>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in {
def VFLL : UnaryVRRaFloatGeneric<"vfll", 0xE7C4>;
def VFLLS : UnaryVRRa<"vflls", 0xE7C4, null_frag, v128db, v128sb, 2, 0>;
def WFLLS : UnaryVRRa<"wflls", 0xE7C4, null_frag, v64db, v32sb, 2, 8>;
}
- def WFLLD : UnaryVRRa<"wflld", 0xE7C4, fpextend, v128xb, v64db, 3, 8>;
+ def WFLLD : UnaryVRRa<"wflld", 0xE7C4, any_fpextend, v128xb, v64db, 3, 8>;
}
- def : Pat<(f128 (fpextend (f32 VR32:$src))),
+ def : Pat<(f128 (any_fpextend (f32 VR32:$src))),
(WFLLD (WLDEB VR32:$src))>;
}
// Load rounded.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VLED : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32sb, v64db, 3, 8>;
}
def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
- def : FPConversion<WLEDB, fpround, v32sb, v64db, 0, 0>;
+ def : FPConversion<WLEDB, any_fpround, v32sb, v64db, 0, 0>;
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
let isAsmParserOnly = 1 in {
def VFLR : TernaryVRRaFloatGeneric<"vflr", 0xE7C5>;
def VFLRD : TernaryVRRa<"vflrd", 0xE7C5, null_frag, v128sb, v128db, 3, 0>;
}
def WFLRX : TernaryVRRa<"wflrx", 0xE7C5, null_frag, v64db, v128xb, 4, 8>;
}
- def : FPConversion<WFLRX, fpround, v64db, v128xb, 0, 0>;
- def : Pat<(f32 (fpround (f128 VR128:$src))),
+ def : FPConversion<WFLRX, any_fpround, v64db, v128xb, 0, 0>;
+ def : Pat<(f32 (any_fpround (f128 VR128:$src))),
(WLEDB (WFLRX VR128:$src, 0, 3), 0, 0)>;
}
// Maximum.
multiclass VectorMax<Instruction insn, TypedReg tr> {
- def : FPMinMax<insn, fmaxnum, tr, 4>;
+ def : FPMinMax<insn, any_fmaxnum, tr, 4>;
def : FPMinMax<insn, fmaximum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFMAX : TernaryVRRcFloatGeneric<"vfmax", 0xE7EF>;
def VFMAXDB : TernaryVRRcFloat<"vfmaxdb", 0xE7EF, int_s390_vfmaxdb,
v128db, v128db, 3, 0>;
// Minimum.
multiclass VectorMin<Instruction insn, TypedReg tr> {
- def : FPMinMax<insn, fminnum, tr, 4>;
+ def : FPMinMax<insn, any_fminnum, tr, 4>;
def : FPMinMax<insn, fminimum, tr, 1>;
}
let Predicates = [FeatureVectorEnhancements1] in {
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFMIN : TernaryVRRcFloatGeneric<"vfmin", 0xE7EE>;
def VFMINDB : TernaryVRRcFloat<"vfmindb", 0xE7EE, int_s390_vfmindb,
v128db, v128db, 3, 0>;
}
// Multiply.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFM : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
- def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
- def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
+ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, any_fmul, v128db, v128db, 3, 0>;
+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, any_fmul, v64db, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, fmul, v128sb, v128sb, 2, 0>;
- def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, fmul, v32sb, v32sb, 2, 8>;
- def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, fmul, v128xb, v128xb, 4, 8>;
+ def VFMSB : BinaryVRRc<"vfmsb", 0xE7E7, any_fmul, v128sb, v128sb, 2, 0>;
+ def WFMSB : BinaryVRRc<"wfmsb", 0xE7E7, any_fmul, v32sb, v32sb, 2, 8>;
+ def WFMXB : BinaryVRRc<"wfmxb", 0xE7E7, any_fmul, v128xb, v128xb, 4, 8>;
}
}
// Multiply and add.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFMA : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
- def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
- def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
+ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, any_fma, v128db, v128db, 0, 3>;
+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, any_fma, v64db, v64db, 8, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, fma, v128sb, v128sb, 0, 2>;
- def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, fma, v32sb, v32sb, 8, 2>;
- def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, fma, v128xb, v128xb, 8, 4>;
+ def VFMASB : TernaryVRRe<"vfmasb", 0xE78F, any_fma, v128sb, v128sb, 0, 2>;
+ def WFMASB : TernaryVRRe<"wfmasb", 0xE78F, any_fma, v32sb, v32sb, 8, 2>;
+ def WFMAXB : TernaryVRRe<"wfmaxb", 0xE78F, any_fma, v128xb, v128xb, 8, 4>;
}
}
// Multiply and subtract.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFMS : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
- def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
- def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
+ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, any_fms, v128db, v128db, 0, 3>;
+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, any_fms, v64db, v64db, 8, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, fms, v128sb, v128sb, 0, 2>;
- def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, fms, v32sb, v32sb, 8, 2>;
- def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, fms, v128xb, v128xb, 8, 4>;
+ def VFMSSB : TernaryVRRe<"vfmssb", 0xE78E, any_fms, v128sb, v128sb, 0, 2>;
+ def WFMSSB : TernaryVRRe<"wfmssb", 0xE78E, any_fms, v32sb, v32sb, 8, 2>;
+ def WFMSXB : TernaryVRRe<"wfmsxb", 0xE78E, any_fms, v128xb, v128xb, 8, 4>;
}
}
// Negative multiply and add.
- let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
def VFNMA : TernaryVRReFloatGeneric<"vfnma", 0xE79F>;
- def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, fnma, v128db, v128db, 0, 3>;
- def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, fnma, v64db, v64db, 8, 3>;
- def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, fnma, v128sb, v128sb, 0, 2>;
- def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, fnma, v32sb, v32sb, 8, 2>;
- def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, fnma, v128xb, v128xb, 8, 4>;
+ def VFNMADB : TernaryVRRe<"vfnmadb", 0xE79F, any_fnma, v128db, v128db, 0, 3>;
+ def WFNMADB : TernaryVRRe<"wfnmadb", 0xE79F, any_fnma, v64db, v64db, 8, 3>;
+ def VFNMASB : TernaryVRRe<"vfnmasb", 0xE79F, any_fnma, v128sb, v128sb, 0, 2>;
+ def WFNMASB : TernaryVRRe<"wfnmasb", 0xE79F, any_fnma, v32sb, v32sb, 8, 2>;
+ def WFNMAXB : TernaryVRRe<"wfnmaxb", 0xE79F, any_fnma, v128xb, v128xb, 8, 4>;
}
// Negative multiply and subtract.
- let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
def VFNMS : TernaryVRReFloatGeneric<"vfnms", 0xE79E>;
- def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, fnms, v128db, v128db, 0, 3>;
- def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, fnms, v64db, v64db, 8, 3>;
- def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, fnms, v128sb, v128sb, 0, 2>;
- def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, fnms, v32sb, v32sb, 8, 2>;
- def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, fnms, v128xb, v128xb, 8, 4>;
+ def VFNMSDB : TernaryVRRe<"vfnmsdb", 0xE79E, any_fnms, v128db, v128db, 0, 3>;
+ def WFNMSDB : TernaryVRRe<"wfnmsdb", 0xE79E, any_fnms, v64db, v64db, 8, 3>;
+ def VFNMSSB : TernaryVRRe<"vfnmssb", 0xE79E, any_fnms, v128sb, v128sb, 0, 2>;
+ def WFNMSSB : TernaryVRRe<"wfnmssb", 0xE79E, any_fnms, v32sb, v32sb, 8, 2>;
+ def WFNMSXB : TernaryVRRe<"wfnmsxb", 0xE79E, any_fnms, v128xb, v128xb, 8, 4>;
}
// Perform sign operation.
}
// Square root.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFSQ : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
- def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
- def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
+ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, any_fsqrt, v128db, v128db, 3, 0>;
+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, any_fsqrt, v64db, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, fsqrt, v128sb, v128sb, 2, 0>;
- def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, fsqrt, v32sb, v32sb, 2, 8>;
- def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, fsqrt, v128xb, v128xb, 4, 8>;
+ def VFSQSB : UnaryVRRa<"vfsqsb", 0xE7CE, any_fsqrt, v128sb, v128sb, 2, 0>;
+ def WFSQSB : UnaryVRRa<"wfsqsb", 0xE7CE, any_fsqrt, v32sb, v32sb, 2, 8>;
+ def WFSQXB : UnaryVRRa<"wfsqxb", 0xE7CE, any_fsqrt, v128xb, v128xb, 4, 8>;
}
}
// Subtract.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFS : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
- def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
- def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
+ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, any_fsub, v128db, v128db, 3, 0>;
+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, any_fsub, v64db, v64db, 3, 8>;
let Predicates = [FeatureVectorEnhancements1] in {
- def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, fsub, v128sb, v128sb, 2, 0>;
- def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, fsub, v32sb, v32sb, 2, 8>;
- def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, fsub, v128xb, v128xb, 4, 8>;
+ def VFSSB : BinaryVRRc<"vfssb", 0xE7E2, any_fsub, v128sb, v128sb, 2, 0>;
+ def WFSSB : BinaryVRRc<"wfssb", 0xE7E2, any_fsub, v32sb, v32sb, 2, 8>;
+ def WFSXB : BinaryVRRc<"wfsxb", 0xE7E2, any_fsub, v128xb, v128xb, 4, 8>;
}
}
let Predicates = [FeatureVector] in {
// Compare scalar.
- let Uses = [FPC], Defs = [CC] in {
+ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFC : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
}
// Compare and signal scalar.
- let Uses = [FPC], Defs = [CC] in {
+ let Uses = [FPC], mayRaiseFPException = 1, Defs = [CC] in {
def WFK : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
let Predicates = [FeatureVectorEnhancements1] in {
}
// Compare equal.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCE : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
v128g, v128db, 3, 0>;
}
// Compare and signal equal.
- let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
defm VFKEDB : BinaryVRRcSPair<"vfkedb", 0xE7E8, null_frag, null_frag,
v128g, v128db, 3, 4>;
defm WFKEDB : BinaryVRRcSPair<"wfkedb", 0xE7E8, null_frag, null_frag,
}
// Compare high.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCH : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
v128g, v128db, 3, 0>;
}
// Compare and signal high.
- let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
defm VFKHDB : BinaryVRRcSPair<"vfkhdb", 0xE7EB, null_frag, null_frag,
v128g, v128db, 3, 4>;
defm WFKHDB : BinaryVRRcSPair<"wfkhdb", 0xE7EB, null_frag, null_frag,
}
// Compare high or equal.
- let Uses = [FPC] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
def VFCHE : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
v128g, v128db, 3, 0>;
}
// Compare and signal high or equal.
- let Uses = [FPC], Predicates = [FeatureVectorEnhancements1] in {
+ let Uses = [FPC], mayRaiseFPException = 1,
+ Predicates = [FeatureVectorEnhancements1] in {
defm VFKHEDB : BinaryVRRcSPair<"vfkhedb", 0xE7EA, null_frag, null_frag,
v128g, v128db, 3, 4>;
defm WFKHEDB : BinaryVRRcSPair<"wfkhedb", 0xE7EA, null_frag, null_frag,
(sub node:$src1, node:$src2)]>;
// Fused multiply-subtract, using the natural operand order.
-def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fma node:$src1, node:$src2, (fneg node:$src3))>;
+def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (any_fma node:$src1, node:$src2, (fneg node:$src3))>;
// Fused multiply-add and multiply-subtract, but with the order of the
// operands matching SystemZ's MA and MS instructions.
-def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fma node:$src2, node:$src3, node:$src1)>;
-def z_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fma node:$src2, node:$src3, (fneg node:$src1))>;
+def z_any_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (any_fma node:$src2, node:$src3, node:$src1)>;
+def z_any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (any_fma node:$src2, node:$src3, (fneg node:$src1))>;
// Negative fused multiply-add and multiply-subtract.
-def fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fneg (fma node:$src1, node:$src2, node:$src3))>;
-def fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
- (fneg (fms node:$src1, node:$src2, node:$src3))>;
+def any_fnma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fneg (any_fma node:$src1, node:$src2, node:$src3))>;
+def any_fnms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+ (fneg (any_fms node:$src1, node:$src2, node:$src3))>;
// Floating-point negative absolute.
def fnabs : PatFrag<(ops node:$ptr), (fneg (fabs node:$ptr))>;
--- /dev/null
+; Test 32-bit floating-point strict addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+
+; Check register addition.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: aebr %f0, %f2
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the low end of the AEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the high end of the aligned AEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: aeb %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: aeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that AEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: aeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that additions of spilled values can use AEB rather than AEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: aeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %ret = call float @foo()
+
+ %add0 = call float @llvm.experimental.constrained.fadd.f32(
+ float %ret, float %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add1 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add0, float %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add2 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add1, float %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add3 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add2, float %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add4 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add3, float %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add5 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add4, float %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add6 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add5, float %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add7 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add6, float %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add8 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add7, float %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add9 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add8, float %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add10 = call float @llvm.experimental.constrained.fadd.f32(
+ float %add9, float %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret float %add10
+}
--- /dev/null
+; Test strict 64-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+declare double @foo()
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+
+; Check register addition.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: adbr %f0, %f2
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the low end of the ADB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the high end of the aligned ADB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: adb %f0, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: adb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that ADB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: adb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that additions of spilled values can use ADB rather than ADBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: adb %f0, 160(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %ret = call double @foo()
+
+ %add0 = call double @llvm.experimental.constrained.fadd.f64(
+ double %ret, double %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add1 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add0, double %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add2 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add1, double %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add3 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add2, double %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add4 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add3, double %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add5 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add4, double %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add6 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add5, double %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add7 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add6, double %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add8 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add7, double %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add9 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add8, double %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %add10 = call double @llvm.experimental.constrained.fadd.f64(
+ double %add9, double %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret double %add10
+}
--- /dev/null
+; Test strict 128-bit floating-point addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit addition.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: axbr %f0, %f1
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr
+ %f2x = fpext float %f2 to fp128
+ %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
+ fp128 %f1, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr
+ ret void
+}
--- /dev/null
+; Test strict 128-bit floating-point addition on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fadd.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfaxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %sum = call fp128 @llvm.experimental.constrained.fadd.f128(
+ fp128 %f1, fp128 %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr1
+ ret void
+}
--- /dev/null
+; Verify that strict FP operations are not rescheduled
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare float @llvm.sqrt.f32(float)
+declare void @llvm.s390.sfpc(i32)
+
+; For non-strict operations, we expect the post-RA scheduler to
+; separate the two square root instructions on z13.
+define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f1:
+; CHECK: sqebr
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: br %r14
+
+ %add = fadd float %f1, %f2
+ %sub = fsub float %f3, %f4
+ %sqrt1 = call float @llvm.sqrt.f32(float %f2)
+ %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+
+ %ptr1 = getelementptr float, float *%ptr0, i64 1
+ %ptr2 = getelementptr float, float *%ptr0, i64 2
+ %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+ store float %add, float *%ptr0
+ store float %sub, float *%ptr1
+ store float %sqrt1, float *%ptr2
+ store float %sqrt2, float *%ptr3
+
+ ret void
+}
+
+; But for strict operations, this must not happen.
+define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f2:
+; CHECK: {{aebr|sebr}}
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: br %r14
+
+ %add = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub = call float @llvm.experimental.constrained.fsub.f32(
+ float %f3, float %f4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+ float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+ float %f4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ %ptr1 = getelementptr float, float *%ptr0, i64 1
+ %ptr2 = getelementptr float, float *%ptr0, i64 2
+ %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+ store float %add, float *%ptr0
+ store float %sub, float *%ptr1
+ store float %sqrt1, float *%ptr2
+ store float %sqrt2, float *%ptr3
+
+ ret void
+}
+
+; On the other hand, strict operations that use the fpexcept.ignore
+; exception behaviour should be scheduled freely.
+define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f3:
+; CHECK: sqebr
+; CHECK: {{aebr|sebr}}
+; CHECK: sqebr
+; CHECK: br %r14
+
+ %add = call float @llvm.experimental.constrained.fadd.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.ignore")
+ %sub = call float @llvm.experimental.constrained.fsub.f32(
+ float %f3, float %f4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.ignore")
+ %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32(
+ float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.ignore")
+ %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32(
+ float %f4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.ignore")
+
+ %ptr1 = getelementptr float, float *%ptr0, i64 1
+ %ptr2 = getelementptr float, float *%ptr0, i64 2
+ %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+ store float %add, float *%ptr0
+ store float %sub, float *%ptr1
+ store float %sqrt1, float *%ptr2
+ store float %sqrt2, float *%ptr3
+
+ ret void
+}
+
+; However, even non-strict operations must not be scheduled across an SFPC.
+define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) {
+; CHECK-LABEL: f4:
+; CHECK: {{aebr|sebr}}
+; CHECK: {{aebr|sebr}}
+; CHECK: sfpc
+; CHECK: sqebr
+; CHECK: sqebr
+; CHECK: br %r14
+
+ %add = fadd float %f1, %f2
+ %sub = fsub float %f3, %f4
+ call void @llvm.s390.sfpc(i32 0)
+ %sqrt1 = call float @llvm.sqrt.f32(float %f2)
+ %sqrt2 = call float @llvm.sqrt.f32(float %f4)
+
+ %ptr1 = getelementptr float, float *%ptr0, i64 1
+ %ptr2 = getelementptr float, float *%ptr0, i64 2
+ %ptr3 = getelementptr float, float *%ptr0, i64 3
+
+ store float %add, float *%ptr0
+ store float %sub, float *%ptr1
+ store float %sqrt1, float *%ptr2
+ store float %sqrt2, float *%ptr3
+
+ ret void
+}
+
--- /dev/null
+; Test strict floating-point truncations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fptrunc.f32.f64(double, metadata, metadata)
+declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+
+; Test f64->f32.
+define float @f1(double %d1, double %d2) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: ledbr %f0, %f2
+; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fptrunc.f32.f64(
+ double %d2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test f128->f32.
+define float @f2(fp128 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: lexbr %f0, %f0
+; CHECK: br %r14
+ %val = load fp128, fp128 *%ptr
+ %res = call float @llvm.experimental.constrained.fptrunc.f32.f128(
+ fp128 %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Make sure that we don't use %f0 as the destination of LEXBR when %f2
+; is still live.
+define void @f3(float *%dst, fp128 *%ptr, float %d1, float %d2) {
+; CHECK-LABEL: f3:
+; CHECK: lexbr %f1, %f1
+; CHECK: aebr %f1, %f2
+; CHECK: ste %f1, 0(%r2)
+; CHECK: br %r14
+ %val = load fp128, fp128 *%ptr
+ %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(
+ fp128 %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = call float @llvm.experimental.constrained.fadd.f32(
+ float %conv, float %d2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store float %res, float *%dst
+ ret void
+}
+
+; Test f128->f64.
+define double @f4(fp128 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: ldxbr %f0, %f0
+; CHECK: br %r14
+ %val = load fp128, fp128 *%ptr
+ %res = call double @llvm.experimental.constrained.fptrunc.f64.f128(
+ fp128 %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Like f3, but for f128->f64.
+define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
+; CHECK-LABEL: f5:
+; CHECK: ldxbr %f1, %f1
+; CHECK-SCALAR: adbr %f1, %f2
+; CHECK-SCALAR: std %f1, 0(%r2)
+; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
+; CHECK-VECTOR: std [[REG]], 0(%r2)
+; CHECK: br %r14
+ %val = load fp128, fp128 *%ptr
+ %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(
+ fp128 %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %res = call double @llvm.experimental.constrained.fadd.f64(
+ double %conv, double %d2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store double %res, double *%dst
+ ret void
+}
--- /dev/null
+; Test strict extensions of f32 to f64.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fpext.f64.f32(float, metadata)
+
+; Check register extension.
+define double @f1(float %val) {
+; CHECK-LABEL: f1:
+; CHECK: ldebr %f0, %f0
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check extension from memory.
+; FIXME: This should really use LDEB, but there is no strict "extload" yet.
+define double @f2(float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK-SCALAR: le %f0, 0(%r2)
+; CHECK-VECTOR: lde %f0, 0(%r2)
+; CHECK: ldebr %f0, %f0
+; CHECK: br %r14
+ %val = load float, float *%ptr
+ %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %val,
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
--- /dev/null
+; Test strict extensions of f32 to f128.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
+
+; Check register extension.
+define void @f1(fp128 *%dst, float %val) {
+; CHECK-LABEL: f1:
+; CHECK: lxebr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check extension from memory.
+; FIXME: This should really use LXEB, but there is no strict "extload" yet.
+define void @f2(fp128 *%dst, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: le %f0, 0(%r3)
+; CHECK: lxebr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %val = load float, float *%ptr
+ %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
--- /dev/null
+; Test strict extensions of f64 to f128.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
+
+; Check register extension.
+define void @f1(fp128 *%dst, double %val) {
+; CHECK-LABEL: f1:
+; CHECK: lxdbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check extension from memory.
+; FIXME: This should really use LXDB, but there is no strict "extload" yet.
+define void @f2(fp128 *%dst, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: ld %f0, 0(%r3)
+; CHECK: lxdbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %val = load double, double *%ptr
+ %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
--- /dev/null
+; Test f128 floating-point strict truncations/extensions on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata)
+declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata)
+declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata)
+
+; Test f128->f64.
+define double @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wflrx %f0, [[REG]], 0, 0
+; CHECK: br %r14
+ %val = load fp128, fp128 *%ptr
+ %res = call double @llvm.experimental.constrained.fptrunc.f64.f128(
+ fp128 %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test f128->f32.
+define float @f2(fp128 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wflrx %f0, [[REG]], 0, 3
+; CHECK: ledbra %f0, 0, %f0, 0
+; CHECK: br %r14
+ %val = load fp128, fp128 *%ptr
+ %res = call float @llvm.experimental.constrained.fptrunc.f32.f128(
+ fp128 %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test f64->f128.
+define void @f3(fp128 *%dst, double %val) {
+; CHECK-LABEL: f3:
+; CHECK: wflld [[RES:%v[0-9]+]], %f0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %res = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %val,
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Test f32->f128.
+define void @f4(fp128 *%dst, float %val) {
+; CHECK-LABEL: f4:
+; CHECK: ldebr %f0, %f0
+; CHECK: wflld [[RES:%v[0-9]+]], %f0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %res = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %val,
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
--- /dev/null
+; Test strict 32-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+
+; Check register division.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: debr %f0, %f2
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the low end of the DEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the high end of the aligned DEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: deb %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: deb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that DEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: deb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %res = call float @llvm.experimental.constrained.fdiv.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that divisions of spilled values can use DEB rather than DEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: deb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %ret = call float @foo()
+
+ %div0 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %ret, float %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div1 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div0, float %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div2 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div1, float %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div3 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div2, float %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div4 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div3, float %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div5 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div4, float %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div6 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div5, float %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div7 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div6, float %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div8 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div7, float %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div9 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div8, float %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div10 = call float @llvm.experimental.constrained.fdiv.f32(
+ float %div9, float %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret float %div10
+}
--- /dev/null
+; Test strict 64-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+
+; Check register division.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: ddbr %f0, %f2
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fdiv.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the low end of the DDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fdiv.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the high end of the aligned DDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: ddb %f0, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fdiv.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fdiv.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: ddb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fdiv.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that DDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: ddb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %res = call double @llvm.experimental.constrained.fdiv.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that divisions of spilled values can use DDB rather than DDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: ddb %f0, 160(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %ret = call double @foo()
+
+ %div0 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %ret, double %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div1 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div0, double %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div2 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div1, double %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div3 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div2, double %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div4 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div3, double %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div5 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div4, double %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div6 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div5, double %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div7 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div6, double %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div8 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div7, double %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div9 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div8, double %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %div10 = call double @llvm.experimental.constrained.fdiv.f64(
+ double %div9, double %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret double %div10
+}
--- /dev/null
+; Test strict 128-bit floating-point division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit division.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: dxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr
+ %f2x = fpext float %f2 to fp128
+ %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
+ fp128 %f1, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr
+ ret void
+}
--- /dev/null
+; Test strict 128-bit floating-point division on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fdiv.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfdxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %sum = call fp128 @llvm.experimental.constrained.fdiv.f128(
+ fp128 %f1, fp128 %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr1
+ ret void
+}
--- /dev/null
+; Test strict multiplication of two f32s, producing an f32 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+
+; Check register multiplication.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: meebr %f0, %f2
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the low end of the MEEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the high end of the aligned MEEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: meeb %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: meeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that MEEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: meeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %res = call float @llvm.experimental.constrained.fmul.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that multiplications of spilled values can use MEEB rather than MEEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: meeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %ret = call float @foo()
+
+ %mul0 = call float @llvm.experimental.constrained.fmul.f32(
+ float %ret, float %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul1 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul0, float %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul2 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul1, float %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul3 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul2, float %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul4 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul3, float %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul5 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul4, float %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul6 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul5, float %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul7 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul6, float %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul8 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul7, float %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul9 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul8, float %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul10 = call float @llvm.experimental.constrained.fmul.f32(
+ float %mul9, float %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret float %mul10
+}
--- /dev/null
+; Test strict multiplication of two f32s, producing an f64 result.
+; FIXME: we do not have a strict version of fpext yet
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare float @foo()
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+
+; Check register multiplication.
+define double @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: mdebr %f0, %f2
+; CHECK: br %r14
+ %f1x = fpext float %f1 to double
+ %f2x = fpext float %f2 to double
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1x, double %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the low end of the MDEB range.
+define double @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %f1x = fpext float %f1 to double
+ %f2x = fpext float %f2 to double
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1x, double %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the high end of the aligned MDEB range.
+define double @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: mdeb %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %f1x = fpext float %f1 to double
+ %f2x = fpext float %f2 to double
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1x, double %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %f1x = fpext float %f1 to double
+ %f2x = fpext float %f2 to double
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1x, double %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mdeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %f1x = fpext float %f1 to double
+ %f2x = fpext float %f2 to double
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1x, double %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that MDEB allows indices.
+define double @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mdeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %f1x = fpext float %f1 to double
+ %f2x = fpext float %f2 to double
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1x, double %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that multiplications of spilled values can use MDEB rather than MDEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mdeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %frob0 = fadd float %val0, %val0
+ %frob1 = fadd float %val1, %val1
+ %frob2 = fadd float %val2, %val2
+ %frob3 = fadd float %val3, %val3
+ %frob4 = fadd float %val4, %val4
+ %frob5 = fadd float %val5, %val5
+ %frob6 = fadd float %val6, %val6
+ %frob7 = fadd float %val7, %val7
+ %frob8 = fadd float %val8, %val8
+ %frob9 = fadd float %val9, %val9
+ %frob10 = fadd float %val9, %val10
+
+ store float %frob0, float *%ptr0
+ store float %frob1, float *%ptr1
+ store float %frob2, float *%ptr2
+ store float %frob3, float *%ptr3
+ store float %frob4, float *%ptr4
+ store float %frob5, float *%ptr5
+ store float %frob6, float *%ptr6
+ store float %frob7, float *%ptr7
+ store float %frob8, float *%ptr8
+ store float %frob9, float *%ptr9
+ store float %frob10, float *%ptr10
+
+ %ret = call float @foo()
+
+ %accext0 = fpext float %ret to double
+ %ext0 = fpext float %frob0 to double
+ %mul0 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext0, double %ext0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra0 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul0, double 1.01,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc0 = fptrunc double %extra0 to float
+
+ %accext1 = fpext float %trunc0 to double
+ %ext1 = fpext float %frob1 to double
+ %mul1 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext1, double %ext1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra1 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul1, double 1.11,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc1 = fptrunc double %extra1 to float
+
+ %accext2 = fpext float %trunc1 to double
+ %ext2 = fpext float %frob2 to double
+ %mul2 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext2, double %ext2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra2 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul2, double 1.21,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc2 = fptrunc double %extra2 to float
+
+ %accext3 = fpext float %trunc2 to double
+ %ext3 = fpext float %frob3 to double
+ %mul3 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext3, double %ext3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra3 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul3, double 1.31,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc3 = fptrunc double %extra3 to float
+
+ %accext4 = fpext float %trunc3 to double
+ %ext4 = fpext float %frob4 to double
+ %mul4 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext4, double %ext4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra4 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul4, double 1.41,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc4 = fptrunc double %extra4 to float
+
+ %accext5 = fpext float %trunc4 to double
+ %ext5 = fpext float %frob5 to double
+ %mul5 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext5, double %ext5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra5 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul5, double 1.51,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc5 = fptrunc double %extra5 to float
+
+ %accext6 = fpext float %trunc5 to double
+ %ext6 = fpext float %frob6 to double
+ %mul6 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext6, double %ext6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra6 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul6, double 1.61,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc6 = fptrunc double %extra6 to float
+
+ %accext7 = fpext float %trunc6 to double
+ %ext7 = fpext float %frob7 to double
+ %mul7 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext7, double %ext7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra7 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul7, double 1.71,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc7 = fptrunc double %extra7 to float
+
+ %accext8 = fpext float %trunc7 to double
+ %ext8 = fpext float %frob8 to double
+ %mul8 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext8, double %ext8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra8 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul8, double 1.81,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc8 = fptrunc double %extra8 to float
+
+ %accext9 = fpext float %trunc8 to double
+ %ext9 = fpext float %frob9 to double
+ %mul9 = call double @llvm.experimental.constrained.fmul.f64(
+ double %accext9, double %ext9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %extra9 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul9, double 1.91,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc9 = fptrunc double %extra9 to float
+
+ ret float %trunc9
+}
--- /dev/null
+; Test strict multiplication of two f64s, producing an f64 result.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+
+; Check register multiplication.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: mdbr %f0, %f2
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the low end of the MDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the high end of the aligned MDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: mdb %f0, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mdb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that MDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %res = call double @llvm.experimental.constrained.fmul.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that multiplications of spilled values can use MDB rather than MDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: mdb %f0, 160(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %ret = call double @foo()
+
+ %mul0 = call double @llvm.experimental.constrained.fmul.f64(
+ double %ret, double %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul1 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul0, double %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul2 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul1, double %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul3 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul2, double %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul4 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul3, double %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul5 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul4, double %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul6 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul5, double %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul7 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul6, double %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul8 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul7, double %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul9 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul8, double %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %mul10 = call double @llvm.experimental.constrained.fmul.f64(
+ double %mul9, double %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret double %mul10
+}
--- /dev/null
+; Test strict multiplication of two f64s, producing an f128 result.
+; FIXME: we do not have a strict version of fpext yet
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+declare double @foo()
+
+; Check register multiplication. "mxdbr %f0, %f2" is not valid from LLVM's
+; point of view, because %f2 is the low register of the FP128 %f0. Pass the
+; multiplier in %f4 instead.
+define void @f1(double %f1, double %dummy, double %f2, fp128 *%dst) {
+; CHECK-LABEL: f1:
+; CHECK: mxdbr %f0, %f4
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check the low end of the MXDB range.
+define void @f2(double %f1, double *%ptr, fp128 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check the high end of the aligned MXDB range.
+define void @f3(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f3:
+; CHECK: mxdb %f0, 4088(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define void @f4(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check negative displacements, which also need separate address logic.
+define void @f5(double %f1, double *%base, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: mxdb %f0, 0(%r2)
+; CHECK: std %f0, 0(%r3)
+; CHECK: std %f2, 8(%r3)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check that MXDB allows indices.
+define void @f6(double %f1, double *%base, i64 %index, fp128 *%dst) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: mxdb %f0, 800(%r1,%r2)
+; CHECK: std %f0, 0(%r4)
+; CHECK: std %f2, 8(%r4)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
+; Check that multiplications of spilled values can use MXDB rather than MXDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mxdb %f0, 160(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %frob0 = fadd double %val0, %val0
+ %frob1 = fadd double %val1, %val1
+ %frob2 = fadd double %val2, %val2
+ %frob3 = fadd double %val3, %val3
+ %frob4 = fadd double %val4, %val4
+ %frob5 = fadd double %val5, %val5
+ %frob6 = fadd double %val6, %val6
+ %frob7 = fadd double %val7, %val7
+ %frob8 = fadd double %val8, %val8
+ %frob9 = fadd double %val9, %val9
+ %frob10 = fadd double %val9, %val10
+
+ store double %frob0, double *%ptr0
+ store double %frob1, double *%ptr1
+ store double %frob2, double *%ptr2
+ store double %frob3, double *%ptr3
+ store double %frob4, double *%ptr4
+ store double %frob5, double *%ptr5
+ store double %frob6, double *%ptr6
+ store double %frob7, double *%ptr7
+ store double %frob8, double *%ptr8
+ store double %frob9, double *%ptr9
+ store double %frob10, double *%ptr10
+
+ %ret = call double @foo()
+
+ %accext0 = fpext double %ret to fp128
+ %ext0 = fpext double %frob0 to fp128
+ %mul0 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext0, fp128 %ext0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const0 = fpext double 1.01 to fp128
+ %extra0 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul0, fp128 %const0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc0 = fptrunc fp128 %extra0 to double
+
+ %accext1 = fpext double %trunc0 to fp128
+ %ext1 = fpext double %frob1 to fp128
+ %mul1 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext1, fp128 %ext1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const1 = fpext double 1.11 to fp128
+ %extra1 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul1, fp128 %const1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc1 = fptrunc fp128 %extra1 to double
+
+ %accext2 = fpext double %trunc1 to fp128
+ %ext2 = fpext double %frob2 to fp128
+ %mul2 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext2, fp128 %ext2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const2 = fpext double 1.21 to fp128
+ %extra2 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul2, fp128 %const2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc2 = fptrunc fp128 %extra2 to double
+
+ %accext3 = fpext double %trunc2 to fp128
+ %ext3 = fpext double %frob3 to fp128
+ %mul3 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext3, fp128 %ext3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const3 = fpext double 1.31 to fp128
+ %extra3 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul3, fp128 %const3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc3 = fptrunc fp128 %extra3 to double
+
+ %accext4 = fpext double %trunc3 to fp128
+ %ext4 = fpext double %frob4 to fp128
+ %mul4 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext4, fp128 %ext4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const4 = fpext double 1.41 to fp128
+ %extra4 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul4, fp128 %const4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc4 = fptrunc fp128 %extra4 to double
+
+ %accext5 = fpext double %trunc4 to fp128
+ %ext5 = fpext double %frob5 to fp128
+ %mul5 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext5, fp128 %ext5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const5 = fpext double 1.51 to fp128
+ %extra5 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul5, fp128 %const5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc5 = fptrunc fp128 %extra5 to double
+
+ %accext6 = fpext double %trunc5 to fp128
+ %ext6 = fpext double %frob6 to fp128
+ %mul6 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext6, fp128 %ext6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const6 = fpext double 1.61 to fp128
+ %extra6 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul6, fp128 %const6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc6 = fptrunc fp128 %extra6 to double
+
+ %accext7 = fpext double %trunc6 to fp128
+ %ext7 = fpext double %frob7 to fp128
+ %mul7 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext7, fp128 %ext7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const7 = fpext double 1.71 to fp128
+ %extra7 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul7, fp128 %const7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc7 = fptrunc fp128 %extra7 to double
+
+ %accext8 = fpext double %trunc7 to fp128
+ %ext8 = fpext double %frob8 to fp128
+ %mul8 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext8, fp128 %ext8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const8 = fpext double 1.81 to fp128
+ %extra8 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul8, fp128 %const8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc8 = fptrunc fp128 %extra8 to double
+
+ %accext9 = fpext double %trunc8 to fp128
+ %ext9 = fpext double %frob9 to fp128
+ %mul9 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %accext9, fp128 %ext9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %const9 = fpext double 1.91 to fp128
+ %extra9 = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %mul9, fp128 %const9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %trunc9 = fptrunc fp128 %extra9 to double
+
+ ret double %trunc9
+}
--- /dev/null
+; Test strict multiplication of two f128s.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit multiplication.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: mxbr %f0, %f1
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr
+ %f2x = fpext float %f2 to fp128
+ %diff = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %diff, fp128 *%ptr
+ ret void
+}
--- /dev/null
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: maebr %f4, %f0, %f2
+; CHECK-SCALAR: ler %f0, %f4
+; CHECK-VECTOR: wfmasb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK-LABEL: f2:
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: maeb %f2, %f0, 4092(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: maeb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 0(%r1,%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 %index
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: maeb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 1023
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: maeb %f2, %f0, 0(%r1)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 1024
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
--- /dev/null
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: madbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK-LABEL: f3:
+; CHECK: madb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: madb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 %index
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: madb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 511
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: madb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 512
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
--- /dev/null
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
+
+define float @f1(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: msebr %f4, %f0, %f2
+; CHECK-SCALAR: ler %f0, %f4
+; CHECK-VECTOR: wfmssb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f2(float %f1, float *%ptr, float %acc) {
+; CHECK-LABEL: f2:
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f3(float %f1, float *%base, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: mseb %f2, %f0, 4092(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f4(float %f1, float *%base, float %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f5(float %f1, float *%base, float %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: mseb %f2, %f0, 0(%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f6(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 0(%r1,%r2)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 %index
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f7(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: mseb %f2, %f0, 4092({{%r1,%r2|%r2,%r1}})
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 1023
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f8(float %f1, float *%base, i64 %index, float %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: mseb %f2, %f0, 0(%r1)
+; CHECK-SCALAR: ler %f0, %f2
+; CHECK-VECTOR: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 1024
+ %ptr = getelementptr float, float *%base, i64 %index2
+ %f2 = load float, float *%ptr
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
--- /dev/null
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK-SCALAR: msdbr %f4, %f0, %f2
+; CHECK-SCALAR: ldr %f0, %f4
+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f2(double %f1, double *%ptr, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f3(double %f1, double *%base, double %acc) {
+; CHECK-LABEL: f3:
+; CHECK: msdb %f2, %f0, 4088(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f4(double %f1, double *%base, double %acc) {
+; The important thing here is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f5(double %f1, double *%base, double %acc) {
+; Here too the important thing is that we don't generate an out-of-range
+; displacement. Other sequences besides this one would be OK.
+;
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: msdb %f2, %f0, 0(%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f6(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 0(%r1,%r2)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 %index
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f7(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f7:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: msdb %f2, %f0, 4088({{%r1,%r2|%r2,%r1}})
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 511
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f8(double %f1, double *%base, i64 %index, double %acc) {
+; CHECK-LABEL: f8:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: lay %r1, 4096({{%r1,%r2|%r2,%r1}})
+; CHECK: msdb %f2, %f0, 0(%r1)
+; CHECK: ldr %f0, %f2
+; CHECK: br %r14
+ %index2 = add i64 %index, 512
+ %ptr = getelementptr double, double *%base, i64 %index2
+ %f2 = load double, double *%ptr
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
--- /dev/null
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fma.f64(double %f1, double %f2, double %f3, metadata, metadata)
+declare float @llvm.experimental.constrained.fma.f32(float %f1, float %f2, float %f3, metadata, metadata)
+
+define double @f1(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f1:
+; CHECK: wfnmadb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negres = fsub double -0.0, %res
+ ret double %negres
+}
+
+define double @f2(double %f1, double %f2, double %acc) {
+; CHECK-LABEL: f2:
+; CHECK: wfnmsdb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %negacc = fsub double -0.0, %acc
+ %res = call double @llvm.experimental.constrained.fma.f64 (
+ double %f1, double %f2, double %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negres = fsub double -0.0, %res
+ ret double %negres
+}
+
+define float @f3(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f3:
+; CHECK: wfnmasb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %acc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negres = fsub float -0.0, %res
+ ret float %negres
+}
+
+define float @f4(float %f1, float %f2, float %acc) {
+; CHECK-LABEL: f4:
+; CHECK: wfnmssb %f0, %f0, %f2, %f4
+; CHECK: br %r14
+ %negacc = fsub float -0.0, %acc
+ %res = call float @llvm.experimental.constrained.fma.f32 (
+ float %f1, float %f2, float %negacc,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negres = fsub float -0.0, %res
+ ret float %negres
+}
+
--- /dev/null
+; Test strict 128-bit floating-point multiplication on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fmul.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %sum = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1, fp128 %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr1
+ ret void
+}
+
+define void @f2(double %f1, double %f2, fp128 *%dst) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: wflld [[REG1:%v[0-9]+]], %f0
+; CHECK-DAG: wflld [[REG2:%v[0-9]+]], %f2
+; CHECK: wfmxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %f1x = fpext double %f1 to fp128
+ %f2x = fpext double %f2 to fp128
+ %res = call fp128 @llvm.experimental.constrained.fmul.f128(
+ fp128 %f1x, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%dst
+ ret void
+}
+
--- /dev/null
+; Test strict rounding functions for z10.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.rint.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbr %f0, 0, %f0
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.rint.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.rint.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: brasl %r14, nearbyintf@PLT
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.nearbyint.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: brasl %r14, nearbyint@PLT
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.nearbyint.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: brasl %r14, nearbyintl@PLT
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, floorf@PLT
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.floor.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: brasl %r14, floor@PLT
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.floor.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: brasl %r14, floorl@PLT
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.floor.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: brasl %r14, ceilf@PLT
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.ceil.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: brasl %r14, ceil@PLT
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.ceil.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, ceill@PLT
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, truncf@PLT
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.trunc.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: brasl %r14, trunc@PLT
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.trunc.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: brasl %r14, truncl@PLT
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, roundf@PLT
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.round.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: brasl %r14, round@PLT
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.round.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: brasl %r14, roundl@PLT
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.round.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
--- /dev/null
+; Test strict rounding functions for z196 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebr %f0, 0, %f0
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.rint.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK-SCALAR: fidbr %f0, 0, %f0
+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.rint.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: fixbr %f0, 0, %f0
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.rint.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.nearbyint.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.nearbyint.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: fixbra %f0, 0, %f0, 4
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.floor.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.floor.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: fixbra %f0, 7, %f0, 4
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.floor.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.ceil.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.ceil.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: fixbra %f0, 6, %f0, 4
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.trunc.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.trunc.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: fixbra %f0, 5, %f0, 4
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.round.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.round.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: fixbra %f0, 1, %f0, 4
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.round.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
--- /dev/null
+; Test strict rounding functions for z14 and above.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+; Test rint for f32.
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+define float @f1(float %f) {
+; CHECK-LABEL: f1:
+; CHECK: fiebra %f0, 0, %f0, 0
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.rint.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test rint for f64.
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+define double @f2(double %f) {
+; CHECK-LABEL: f2:
+; CHECK: fidbra %f0, 0, %f0, 0
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.rint.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test rint for f128.
+declare fp128 @llvm.experimental.constrained.rint.f128(fp128, metadata, metadata)
+define void @f3(fp128 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 0, 0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.rint.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test nearbyint for f32.
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+define float @f4(float %f) {
+; CHECK-LABEL: f4:
+; CHECK: fiebra %f0, 0, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.nearbyint.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test nearbyint for f64.
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+define double @f5(double %f) {
+; CHECK-LABEL: f5:
+; CHECK: fidbra %f0, 0, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.nearbyint.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test nearbyint for f128.
+declare fp128 @llvm.experimental.constrained.nearbyint.f128(fp128, metadata, metadata)
+define void @f6(fp128 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 0
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.nearbyint.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test floor for f32.
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+define float @f7(float %f) {
+; CHECK-LABEL: f7:
+; CHECK: fiebra %f0, 7, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.floor.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test floor for f64.
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+define double @f8(double %f) {
+; CHECK-LABEL: f8:
+; CHECK: fidbra %f0, 7, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.floor.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test floor for f128.
+declare fp128 @llvm.experimental.constrained.floor.f128(fp128, metadata, metadata)
+define void @f9(fp128 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 7
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.floor.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test ceil for f32.
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+define float @f10(float %f) {
+; CHECK-LABEL: f10:
+; CHECK: fiebra %f0, 6, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.ceil.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test ceil for f64.
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+define double @f11(double %f) {
+; CHECK-LABEL: f11:
+; CHECK: fidbra %f0, 6, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.ceil.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test ceil for f128.
+declare fp128 @llvm.experimental.constrained.ceil.f128(fp128, metadata, metadata)
+define void @f12(fp128 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 6
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.ceil.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test trunc for f32.
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+define float @f13(float %f) {
+; CHECK-LABEL: f13:
+; CHECK: fiebra %f0, 5, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.trunc.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test trunc for f64.
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+define double @f14(double %f) {
+; CHECK-LABEL: f14:
+; CHECK: fidbra %f0, 5, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.trunc.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test trunc for f128.
+declare fp128 @llvm.experimental.constrained.trunc.f128(fp128, metadata, metadata)
+define void @f15(fp128 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 5
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.trunc.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
+; Test round for f32.
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+define float @f16(float %f) {
+; CHECK-LABEL: f16:
+; CHECK: fiebra %f0, 1, %f0, 4
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.round.f32(
+ float %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Test round for f64.
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+define double @f17(double %f) {
+; CHECK-LABEL: f17:
+; CHECK: fidbra %f0, 1, %f0, 4
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.round.f64(
+ double %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Test round for f128.
+declare fp128 @llvm.experimental.constrained.round.f128(fp128, metadata, metadata)
+define void @f18(fp128 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfixb [[RES:%v[0-9]+]], [[REG]], 4, 1
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %src = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.round.f128(
+ fp128 %src,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
+
--- /dev/null
+; Test strict 32-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+
+; Check register square root.
+define float @f1(float %val) {
+; CHECK-LABEL: f1:
+; CHECK: sqebr %f0, %f0
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the low end of the SQEB range.
+define float @f2(float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+ %val = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the high end of the aligned SQEB range.
+define float @f3(float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sqeb %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %val = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %val = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: sqeb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %val = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that SQEB allows indices.
+define float @f6(float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: sqeb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %val = load float, float *%ptr2
+ %res = call float @llvm.experimental.constrained.sqrt.f32(
+ float %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
--- /dev/null
+; Test strict 64-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+
+; Check register square root.
+define double @f1(double %val) {
+; CHECK-LABEL: f1:
+; CHECK: sqdbr %f0, %f0
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the low end of the SQDB range.
+define double @f2(double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+ %val = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the high end of the aligned SQDB range.
+define double @f3(double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sqdb %f0, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %val = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %val = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sqdb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %val = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that SQDB allows indices.
+define double @f6(double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sqdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %val = load double, double *%ptr2
+ %res = call double @llvm.experimental.constrained.sqrt.f64(
+ double %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
--- /dev/null
+; Test strict 128-bit square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
+; There's no memory form of SQXBR.
+define void @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: ld %f0, 0(%r2)
+; CHECK: ld %f2, 8(%r2)
+; CHECK: sqxbr %f0, %f0
+; CHECK: std %f0, 0(%r2)
+; CHECK: std %f2, 8(%r2)
+; CHECK: br %r14
+ %orig = load fp128, fp128 *%ptr
+ %sqrt = call fp128 @llvm.experimental.constrained.sqrt.f128(
+ fp128 %orig,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sqrt, fp128 *%ptr
+ ret void
+}
--- /dev/null
+; Test strict 128-bit floating-point square root on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.sqrt.f128(fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG:%v[0-9]+]], 0(%r2)
+; CHECK: wfsqxb [[RES:%v[0-9]+]], [[REG]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %f = load fp128, fp128 *%ptr
+ %res = call fp128 @llvm.experimental.constrained.sqrt.f128(
+ fp128 %f,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128 *%ptr
+ ret void
+}
--- /dev/null
+; Test 32-bit floating-point strict subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @foo()
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+
+; Check register subtraction.
+define float @f1(float %f1, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK: sebr %f0, %f2
+; CHECK: br %r14
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the low end of the SEB range.
+define float @f2(float %f1, float *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the high end of the aligned SEB range.
+define float @f3(float %f1, float *%base) {
+; CHECK-LABEL: f3:
+; CHECK: seb %f0, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1023
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check the next word up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define float @f4(float %f1, float *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 1024
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define float @f5(float %f1, float *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -4
+; CHECK: seb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr float, float *%base, i64 -1
+ %f2 = load float, float *%ptr
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that SEB allows indices.
+define float @f6(float %f1, float *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 2
+; CHECK: seb %f0, 400(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%base, i64 %index
+ %ptr2 = getelementptr float, float *%ptr1, i64 100
+ %f2 = load float, float *%ptr2
+ %res = call float @llvm.experimental.constrained.fsub.f32(
+ float %f1, float %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+; Check that subtractions of spilled values can use SEB rather than SEBR.
+define float @f7(float *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: seb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float, float *%ptr0, i64 2
+ %ptr2 = getelementptr float, float *%ptr0, i64 4
+ %ptr3 = getelementptr float, float *%ptr0, i64 6
+ %ptr4 = getelementptr float, float *%ptr0, i64 8
+ %ptr5 = getelementptr float, float *%ptr0, i64 10
+ %ptr6 = getelementptr float, float *%ptr0, i64 12
+ %ptr7 = getelementptr float, float *%ptr0, i64 14
+ %ptr8 = getelementptr float, float *%ptr0, i64 16
+ %ptr9 = getelementptr float, float *%ptr0, i64 18
+ %ptr10 = getelementptr float, float *%ptr0, i64 20
+
+ %val0 = load float, float *%ptr0
+ %val1 = load float, float *%ptr1
+ %val2 = load float, float *%ptr2
+ %val3 = load float, float *%ptr3
+ %val4 = load float, float *%ptr4
+ %val5 = load float, float *%ptr5
+ %val6 = load float, float *%ptr6
+ %val7 = load float, float *%ptr7
+ %val8 = load float, float *%ptr8
+ %val9 = load float, float *%ptr9
+ %val10 = load float, float *%ptr10
+
+ %ret = call float @foo()
+
+ %sub0 = call float @llvm.experimental.constrained.fsub.f32(
+ float %ret, float %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub1 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub0, float %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub2 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub1, float %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub3 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub2, float %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub4 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub3, float %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub5 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub4, float %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub6 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub5, float %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub7 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub6, float %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub8 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub7, float %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub9 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub8, float %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub10 = call float @llvm.experimental.constrained.fsub.f32(
+ float %sub9, float %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret float %sub10
+}
--- /dev/null
+; Test strict 64-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @foo()
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+
+; Check register subtraction.
+define double @f1(double %f1, double %f2) {
+; CHECK-LABEL: f1:
+; CHECK: sdbr %f0, %f2
+; CHECK: br %r14
+ %res = call double @llvm.experimental.constrained.fsub.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the low end of the SDB range.
+define double @f2(double %f1, double *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fsub.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the high end of the aligned SDB range.
+define double @f3(double %f1, double *%base) {
+; CHECK-LABEL: f3:
+; CHECK: sdb %f0, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 511
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fsub.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check the next doubleword up, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define double @f4(double %f1, double *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 512
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fsub.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check negative displacements, which also need separate address logic.
+define double @f5(double %f1, double *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, -8
+; CHECK: sdb %f0, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr double, double *%base, i64 -1
+ %f2 = load double, double *%ptr
+ %res = call double @llvm.experimental.constrained.fsub.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that SDB allows indices.
+define double @f6(double %f1, double *%base, i64 %index) {
+; CHECK-LABEL: f6:
+; CHECK: sllg %r1, %r3, 3
+; CHECK: sdb %f0, 800(%r1,%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%base, i64 %index
+ %ptr2 = getelementptr double, double *%ptr1, i64 100
+ %f2 = load double, double *%ptr2
+ %res = call double @llvm.experimental.constrained.fsub.f64(
+ double %f1, double %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+; Check that subtractions of spilled values can use SDB rather than SDBR.
+define double @f7(double *%ptr0) {
+; CHECK-LABEL: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr double, double *%ptr0, i64 2
+ %ptr2 = getelementptr double, double *%ptr0, i64 4
+ %ptr3 = getelementptr double, double *%ptr0, i64 6
+ %ptr4 = getelementptr double, double *%ptr0, i64 8
+ %ptr5 = getelementptr double, double *%ptr0, i64 10
+ %ptr6 = getelementptr double, double *%ptr0, i64 12
+ %ptr7 = getelementptr double, double *%ptr0, i64 14
+ %ptr8 = getelementptr double, double *%ptr0, i64 16
+ %ptr9 = getelementptr double, double *%ptr0, i64 18
+ %ptr10 = getelementptr double, double *%ptr0, i64 20
+
+ %val0 = load double, double *%ptr0
+ %val1 = load double, double *%ptr1
+ %val2 = load double, double *%ptr2
+ %val3 = load double, double *%ptr3
+ %val4 = load double, double *%ptr4
+ %val5 = load double, double *%ptr5
+ %val6 = load double, double *%ptr6
+ %val7 = load double, double *%ptr7
+ %val8 = load double, double *%ptr8
+ %val9 = load double, double *%ptr9
+ %val10 = load double, double *%ptr10
+
+ %ret = call double @foo()
+
+ %sub0 = call double @llvm.experimental.constrained.fsub.f64(
+ double %ret, double %val0,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub1 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub0, double %val1,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub2 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub1, double %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub3 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub2, double %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub4 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub3, double %val4,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub5 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub4, double %val5,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub6 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub5, double %val6,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub7 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub6, double %val7,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub8 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub7, double %val8,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub9 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub8, double %val9,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %sub10 = call double @llvm.experimental.constrained.fsub.f64(
+ double %sub9, double %val10,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+
+ ret double %sub10
+}
--- /dev/null
+; Test strict 128-bit floating-point subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+
+; There is no memory form of 128-bit subtraction.
+define void @f1(fp128 *%ptr, float %f2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: lxebr %f0, %f0
+; CHECK-DAG: ld %f1, 0(%r2)
+; CHECK-DAG: ld %f3, 8(%r2)
+; CHECK: sxbr %f1, %f0
+; CHECK: std %f1, 0(%r2)
+; CHECK: std %f3, 8(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr
+ %f2x = fpext float %f2 to fp128
+ %sum = call fp128 @llvm.experimental.constrained.fsub.f128(
+ fp128 %f1, fp128 %f2x,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr
+ ret void
+}
--- /dev/null
+; Test strict 128-bit floating-point subtraction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare fp128 @llvm.experimental.constrained.fsub.f128(fp128, fp128, metadata, metadata)
+
+define void @f1(fp128 *%ptr1, fp128 *%ptr2) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfsxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]]
+; CHECK: vst [[RES]], 0(%r2)
+; CHECK: br %r14
+ %f1 = load fp128, fp128 *%ptr1
+ %f2 = load fp128, fp128 *%ptr2
+ %sum = call fp128 @llvm.experimental.constrained.fsub.f128(
+ fp128 %f1, fp128 %f2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %sum, fp128 *%ptr1
+ ret void
+}
--- /dev/null
+; Test strict vector addition.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 addition.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfadb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test an f64 addition that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfadb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = call double @llvm.experimental.constrained.fadd.f64(
+ double %scalar1, double %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
--- /dev/null
+; Test strict vector addition on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 addition.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfasb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test an f32 addition that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfasb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <4 x float> %val1, i32 0
+ %scalar2 = extractelement <4 x float> %val2, i32 0
+ %ret = call float @llvm.experimental.constrained.fadd.f32(
+ float %scalar1, float %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
--- /dev/null
+; Test strict vector division.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 division.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfddb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test an f64 division that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfddb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = call double @llvm.experimental.constrained.fdiv.f64(
+ double %scalar1, double %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
--- /dev/null
+; Test strict vector division on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 division.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfdsb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test an f32 division that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfdsb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <4 x float> %val1, i32 0
+ %scalar2 = extractelement <4 x float> %val2, i32 0
+ %ret = call float @llvm.experimental.constrained.fdiv.f32(
+ float %scalar1, float %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
--- /dev/null
+; Test strict vector maximum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.maxnum.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.maxnum.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.maxnum.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.maxnum.f128(fp128, fp128, metadata, metadata)
+
+; Test the f64 maxnum intrinsic.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmaxdb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call double @llvm.experimental.constrained.maxnum.f64(
+ double %val1, double %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
+
+; Test the v2f64 maxnum intrinsic.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vfmaxdb %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test the f32 maxnum intrinsic.
+define float @f3(float %dummy, float %val1, float %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfmaxsb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call float @llvm.experimental.constrained.maxnum.f32(
+ float %val1, float %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
+
+; Test the v4f32 maxnum intrinsic.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfmaxsb %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.maxnum.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test the f128 maxnum intrinsic.
+define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfmaxxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
+; CHECK: vst [[RES]], 0(%r4)
+; CHECK: br %r14
+ %val1 = load fp128, fp128* %ptr1
+ %val2 = load fp128, fp128* %ptr2
+ %res = call fp128 @llvm.experimental.constrained.maxnum.f128(
+ fp128 %val1, fp128 %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128* %dst
+ ret void
+}
+
--- /dev/null
+; Test strict vector minimum on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare double @llvm.experimental.constrained.minnum.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+declare float @llvm.experimental.constrained.minnum.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.minnum.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+declare fp128 @llvm.experimental.constrained.minnum.f128(fp128, fp128, metadata, metadata)
+
+; Test the f64 minnum intrinsic.
+define double @f1(double %dummy, double %val1, double %val2) {
+; CHECK-LABEL: f1:
+; CHECK: wfmindb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call double @llvm.experimental.constrained.minnum.f64(
+ double %val1, double %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
+
+; Test the v2f64 minnum intrinsic.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vfmindb %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test the f32 minnum intrinsic.
+define float @f3(float %dummy, float %val1, float %val2) {
+; CHECK-LABEL: f3:
+; CHECK: wfminsb %f0, %f2, %f4, 4
+; CHECK: br %r14
+ %ret = call float @llvm.experimental.constrained.minnum.f32(
+ float %val1, float %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
+
+; Test the v4f32 minnum intrinsic.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vfminsb %v24, %v26, %v28, 4
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.minnum.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test the f128 minnum intrinsic.
+define void @f5(fp128 *%ptr1, fp128 *%ptr2, fp128 *%dst) {
+; CHECK-LABEL: f5:
+; CHECK-DAG: vl [[REG1:%v[0-9]+]], 0(%r2)
+; CHECK-DAG: vl [[REG2:%v[0-9]+]], 0(%r3)
+; CHECK: wfminxb [[RES:%v[0-9]+]], [[REG1]], [[REG2]], 4
+; CHECK: vst [[RES]], 0(%r4)
+; CHECK: br %r14
+ %val1 = load fp128, fp128* %ptr1
+ %val2 = load fp128, fp128* %ptr2
+ %res = call fp128 @llvm.experimental.constrained.minnum.f128(
+ fp128 %val1, fp128 %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ store fp128 %res, fp128* %dst
+ ret void
+}
+
--- /dev/null
+; Test strict vector multiplication.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 multiplication.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vfmdb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test an f64 multiplication that uses vector registers.
+define double @f6(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: wfmdb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = call double @llvm.experimental.constrained.fmul.f64(
+ double %scalar1, double %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
--- /dev/null
+; Test strict vector multiply-and-add.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 multiply-and-add.
+define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+ <2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test a v2f64 multiply-and-subtract.
+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f5:
+; CHECK: vfmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+ %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+ <2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %negval3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
--- /dev/null
+; Test strict vector multiplication on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 multiplication.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vfmsb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test an f32 multiplication that uses vector registers.
+define float @f2(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: wfmsb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <4 x float> %val1, i32 0
+ %scalar2 = extractelement <4 x float> %val2, i32 0
+ %ret = call float @llvm.experimental.constrained.fmul.f32(
+ float %scalar1, float %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
--- /dev/null
+; Test strict vector multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 multiply-and-add.
+define <4 x float> @f1(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfmasb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+ <4 x float> %val1,
+ <4 x float> %val2,
+ <4 x float> %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test a v4f32 multiply-and-subtract.
+define <4 x float> @f2(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfmssb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %negval3 = fsub <4 x float> <float -0.0, float -0.0,
+ float -0.0, float -0.0>, %val3
+ %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+ <4 x float> %val1,
+ <4 x float> %val2,
+ <4 x float> %negval3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
--- /dev/null
+; Test vector negative multiply-and-add on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v2f64 negative multiply-and-add.
+define <2 x double> @f1(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vfnmadb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+ <2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+ ret <2 x double> %negret
+}
+
+; Test a v2f64 negative multiply-and-subtract.
+define <2 x double> @f2(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vfnmsdb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
+ %ret = call <2 x double> @llvm.experimental.constrained.fma.v2f64 (
+ <2 x double> %val1,
+ <2 x double> %val2,
+ <2 x double> %negval3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negret = fsub <2 x double> <double -0.0, double -0.0>, %ret
+ ret <2 x double> %negret
+}
+
+; Test a v4f32 negative multiply-and-add.
+define <4 x float> @f3(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f3:
+; CHECK: vfnmasb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+ <4 x float> %val1,
+ <4 x float> %val2,
+ <4 x float> %val3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negret = fsub <4 x float> <float -0.0, float -0.0,
+ float -0.0, float -0.0>, %ret
+ ret <4 x float> %negret
+}
+
+; Test a v4f32 negative multiply-and-subtract.
+define <4 x float> @f4(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vfnmssb %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %negval3 = fsub <4 x float> <float -0.0, float -0.0,
+ float -0.0, float -0.0>, %val3
+ %ret = call <4 x float> @llvm.experimental.constrained.fma.v4f32 (
+ <4 x float> %val1,
+ <4 x float> %val2,
+ <4 x float> %negval3,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ %negret = fsub <4 x float> <float -0.0, float -0.0,
+ float -0.0, float -0.0>, %ret
+ ret <4 x float> %negret
+}
--- /dev/null
+; Test strict v2f64 rounding.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.floor.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.ceil.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.trunc.f64(double, metadata, metadata)
+declare double @llvm.experimental.constrained.round.f64(double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfidb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <2 x double> @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfidb %v24, %v24, 4, 0
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <2 x double> @f3(<2 x double> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfidb %v24, %v24, 4, 7
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <2 x double> @f4(<2 x double> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfidb %v24, %v24, 4, 6
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <2 x double> @f5(<2 x double> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfidb %v24, %v24, 4, 5
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define <2 x double> @f6(<2 x double> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfidb %v24, %v24, 4, 1
+; CHECK: br %r14
+ %res = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %res
+}
+
+define double @f7(<2 x double> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfidb %f0, %v24, 0, 0
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.experimental.constrained.rint.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f8(<2 x double> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfidb %f0, %v24, 4, 0
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.experimental.constrained.nearbyint.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f9(<2 x double> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfidb %f0, %v24, 4, 7
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.experimental.constrained.floor.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+
+define double @f10(<2 x double> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfidb %f0, %v24, 4, 6
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.experimental.constrained.ceil.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f11(<2 x double> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfidb %f0, %v24, 4, 5
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.experimental.constrained.trunc.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
+
+define double @f12(<2 x double> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfidb %f0, %v24, 4, 1
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %res = call double @llvm.experimental.constrained.round.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %res
+}
--- /dev/null
+; Test strict v4f32 rounding on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.floor.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.ceil.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.trunc.f32(float, metadata, metadata)
+declare float @llvm.experimental.constrained.round.f32(float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.round.v4f32(<4 x float>, metadata, metadata)
+
+define <4 x float> @f1(<4 x float> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfisb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <4 x float> @f2(<4 x float> %val) {
+; CHECK-LABEL: f2:
+; CHECK: vfisb %v24, %v24, 4, 0
+; CHECK: br %r14
+ %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <4 x float> @f3(<4 x float> %val) {
+; CHECK-LABEL: f3:
+; CHECK: vfisb %v24, %v24, 4, 7
+; CHECK: br %r14
+ %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <4 x float> @f4(<4 x float> %val) {
+; CHECK-LABEL: f4:
+; CHECK: vfisb %v24, %v24, 4, 6
+; CHECK: br %r14
+ %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <4 x float> @f5(<4 x float> %val) {
+; CHECK-LABEL: f5:
+; CHECK: vfisb %v24, %v24, 4, 5
+; CHECK: br %r14
+ %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define <4 x float> @f6(<4 x float> %val) {
+; CHECK-LABEL: f6:
+; CHECK: vfisb %v24, %v24, 4, 1
+; CHECK: br %r14
+ %res = call <4 x float> @llvm.experimental.constrained.round.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %res
+}
+
+define float @f7(<4 x float> %val) {
+; CHECK-LABEL: f7:
+; CHECK: wfisb %f0, %v24, 0, 0
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %res = call float @llvm.experimental.constrained.rint.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f8(<4 x float> %val) {
+; CHECK-LABEL: f8:
+; CHECK: wfisb %f0, %v24, 4, 0
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %res = call float @llvm.experimental.constrained.nearbyint.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f9(<4 x float> %val) {
+; CHECK-LABEL: f9:
+; CHECK: wfisb %f0, %v24, 4, 7
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %res = call float @llvm.experimental.constrained.floor.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f10(<4 x float> %val) {
+; CHECK-LABEL: f10:
+; CHECK: wfisb %f0, %v24, 4, 6
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %res = call float @llvm.experimental.constrained.ceil.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f11(<4 x float> %val) {
+; CHECK-LABEL: f11:
+; CHECK: wfisb %f0, %v24, 4, 5
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %res = call float @llvm.experimental.constrained.trunc.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
+
+define float @f12(<4 x float> %val) {
+; CHECK-LABEL: f12:
+; CHECK: wfisb %f0, %v24, 4, 1
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %res = call float @llvm.experimental.constrained.round.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %res
+}
--- /dev/null
+; Test f64 and v2f64 square root.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+
+define <2 x double> @f1(<2 x double> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqdb %v24, %v24
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+ <2 x double> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+define double @f2(<2 x double> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqdb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <2 x double> %val, i32 0
+ %ret = call double @llvm.experimental.constrained.sqrt.f64(
+ double %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
--- /dev/null
+; Test strict f32 and v4f32 square root on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
+
+define <4 x float> @f1(<4 x float> %val) {
+; CHECK-LABEL: f1:
+; CHECK: vfsqsb %v24, %v24
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
+ <4 x float> %val,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+define float @f2(<4 x float> %val) {
+; CHECK-LABEL: f2:
+; CHECK: wfsqsb %f0, %v24
+; CHECK: br %r14
+ %scalar = extractelement <4 x float> %val, i32 0
+ %ret = call float @llvm.experimental.constrained.sqrt.f32(
+ float %scalar,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
--- /dev/null
+; Test strict vector subtraction.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+
+; Test a v2f64 subtraction.
+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
+ <2 x double> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfsdb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+ <2 x double> %val1, <2 x double> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <2 x double> %ret
+}
+
+; Test an f64 subtraction that uses vector registers.
+define double @f7(<2 x double> %val1, <2 x double> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfsdb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <2 x double> %val1, i32 0
+ %scalar2 = extractelement <2 x double> %val2, i32 0
+ %ret = call double @llvm.experimental.constrained.fsub.f64(
+ double %scalar1, double %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret double %ret
+}
+
--- /dev/null
+; Test strict vector subtraction on z14.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s
+
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
+
+; Test a v4f32 subtraction.
+define <4 x float> @f6(<4 x float> %dummy, <4 x float> %val1,
+ <4 x float> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vfssb %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
+ <4 x float> %val1, <4 x float> %val2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret <4 x float> %ret
+}
+
+; Test an f32 subtraction that uses vector registers.
+define float @f7(<4 x float> %val1, <4 x float> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: wfssb %f0, %v24, %v26
+; CHECK: br %r14
+ %scalar1 = extractelement <4 x float> %val1, i32 0
+ %scalar2 = extractelement <4 x float> %val2, i32 0
+ %ret = call float @llvm.experimental.constrained.fsub.f32(
+ float %scalar1, float %scalar2,
+ metadata !"round.dynamic",
+ metadata !"fpexcept.strict")
+ ret float %ret
+}
; S390X-NEXT: larl %r1, .LCPI1_0
; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI1_1
-; S390X-NEXT: ldeb %f0, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI1_2
; S390X-NEXT: ldeb %f2, 0(%r1)
-; S390X-NEXT: ddbr %f0, %f1
+; S390X-NEXT: larl %r1, .LCPI1_2
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: ddbr %f2, %f1
+; S390X-NEXT: ddbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fdiv_v2f64:
; S390X-NEXT: larl %r1, .LCPI2_0
; S390X-NEXT: le %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI2_1
-; S390X-NEXT: le %f0, 0(%r1)
+; S390X-NEXT: le %f4, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI2_2
; S390X-NEXT: le %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI2_3
-; S390X-NEXT: le %f4, 0(%r1)
-; S390X-NEXT: debr %f0, %f1
-; S390X-NEXT: debr %f2, %f1
+; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: debr %f4, %f1
+; S390X-NEXT: debr %f2, %f1
+; S390X-NEXT: debr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fdiv_v3f32:
define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
; S390X-LABEL: constrained_vector_fdiv_v3f64:
; S390X: # %bb.0: # %entry
-; S390X-NEXT: larl %r1, .LCPI3_1
-; S390X-NEXT: ldeb %f0, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI3_2
-; S390X-NEXT: ldeb %f1, 0(%r1)
+; S390X-NEXT: ld %f0, 16(%r2)
+; S390X-NEXT: ld %f1, 8(%r2)
; S390X-NEXT: larl %r1, .LCPI3_0
; S390X-NEXT: ldeb %f2, 0(%r1)
-; S390X-NEXT: ddb %f1, 16(%r2)
-; S390X-NEXT: ddb %f0, 8(%r2)
+; S390X-NEXT: larl %r1, .LCPI3_1
+; S390X-NEXT: ldeb %f3, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI3_2
+; S390X-NEXT: ldeb %f4, 0(%r1)
; S390X-NEXT: ddb %f2, 0(%r2)
-; S390X-NEXT: std %f1, 16(%r2)
-; S390X-NEXT: std %f0, 8(%r2)
+; S390X-NEXT: ddbr %f3, %f1
+; S390X-NEXT: ddbr %f4, %f0
+; S390X-NEXT: std %f4, 16(%r2)
+; S390X-NEXT: std %f3, 8(%r2)
; S390X-NEXT: std %f2, 0(%r2)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fdiv_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI3_0
-; SZ13-NEXT: vl %v0, 0(%r2)
-; SZ13-NEXT: vl %v1, 0(%r1)
-; SZ13-NEXT: vfddb %v0, %v1, %v0
-; SZ13-NEXT: larl %r1, .LCPI3_1
; SZ13-NEXT: ldeb %f1, 0(%r1)
; SZ13-NEXT: ddb %f1, 16(%r2)
+; SZ13-NEXT: larl %r1, .LCPI3_1
+; SZ13-NEXT: vl %v0, 0(%r2)
+; SZ13-NEXT: vl %v2, 0(%r1)
; SZ13-NEXT: std %f1, 16(%r2)
+; SZ13-NEXT: vfddb %v0, %v2, %v0
; SZ13-NEXT: vst %v0, 0(%r2)
; SZ13-NEXT: br %r14
entry:
; S390X-NEXT: larl %r1, .LCPI4_0
; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI4_1
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ldeb %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI4_2
-; S390X-NEXT: ldeb %f2, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI4_3
; S390X-NEXT: ldeb %f4, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI4_3
+; S390X-NEXT: ldeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI4_4
-; S390X-NEXT: ldeb %f6, 0(%r1)
-; S390X-NEXT: ddbr %f0, %f1
-; S390X-NEXT: ddbr %f2, %f1
-; S390X-NEXT: ddbr %f4, %f1
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: ddbr %f6, %f1
+; S390X-NEXT: ddbr %f4, %f1
+; S390X-NEXT: ddbr %f2, %f1
+; S390X-NEXT: ddbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fdiv_v4f64:
; SZ13-NEXT: vl %v0, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI4_1
; SZ13-NEXT: vl %v1, 0(%r1)
-; SZ13-NEXT: vfddb %v24, %v1, %v0
+; SZ13-NEXT: vfddb %v26, %v1, %v0
; SZ13-NEXT: larl %r1, .LCPI4_2
; SZ13-NEXT: vl %v1, 0(%r1)
-; SZ13-NEXT: vfddb %v26, %v1, %v0
+; SZ13-NEXT: vfddb %v24, %v1, %v0
; SZ13-NEXT: br %r14
entry:
%div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
; S390X-LABEL: constrained_vector_fmul_v2f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI11_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ldeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI11_1
; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI11_2
-; S390X-NEXT: ldeb %f2, 0(%r1)
-; S390X-NEXT: mdbr %f0, %f1
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: mdbr %f2, %f1
+; S390X-NEXT: mdbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fmul_v2f64:
; S390X-LABEL: constrained_vector_fmul_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI12_0
-; S390X-NEXT: le %f4, 0(%r1)
+; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI12_1
-; S390X-NEXT: ler %f0, %f4
-; S390X-NEXT: meeb %f0, 0(%r1)
+; S390X-NEXT: ler %f4, %f0
+; S390X-NEXT: meeb %f4, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI12_2
-; S390X-NEXT: ler %f2, %f4
+; S390X-NEXT: ler %f2, %f0
; S390X-NEXT: meeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI12_3
-; S390X-NEXT: meeb %f4, 0(%r1)
+; S390X-NEXT: meeb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fmul_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI13_0
; S390X-NEXT: ld %f0, 0(%r1)
-; S390X-NEXT: ldr %f1, %f0
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: mdb %f0, 16(%r2)
-; S390X-NEXT: mdb %f2, 8(%r2)
-; S390X-NEXT: mdb %f1, 0(%r2)
-; S390X-NEXT: std %f0, 16(%r2)
-; S390X-NEXT: std %f2, 8(%r2)
-; S390X-NEXT: std %f1, 0(%r2)
+; S390X-NEXT: ld %f1, 8(%r2)
+; S390X-NEXT: ld %f2, 16(%r2)
+; S390X-NEXT: ldr %f3, %f0
+; S390X-NEXT: mdb %f3, 0(%r2)
+; S390X-NEXT: mdbr %f1, %f0
+; S390X-NEXT: mdbr %f2, %f0
+; S390X-NEXT: std %f2, 16(%r2)
+; S390X-NEXT: std %f1, 8(%r2)
+; S390X-NEXT: std %f3, 0(%r2)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fmul_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI13_0
-; SZ13-NEXT: vl %v0, 0(%r2)
-; SZ13-NEXT: vl %v1, 0(%r1)
-; SZ13-NEXT: larl %r1, .LCPI13_1
-; SZ13-NEXT: vfmdb %v0, %v1, %v0
; SZ13-NEXT: ld %f1, 0(%r1)
+; SZ13-NEXT: larl %r1, .LCPI13_1
+; SZ13-NEXT: vl %v0, 0(%r2)
+; SZ13-NEXT: vl %v2, 0(%r1)
; SZ13-NEXT: mdb %f1, 16(%r2)
+; SZ13-NEXT: vfmdb %v0, %v2, %v0
; SZ13-NEXT: std %f1, 16(%r2)
; SZ13-NEXT: vst %v0, 0(%r2)
; SZ13-NEXT: br %r14
; S390X-LABEL: constrained_vector_fmul_v4f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI14_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ldeb %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI14_1
; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI14_2
-; S390X-NEXT: ldeb %f2, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI14_3
; S390X-NEXT: ldeb %f4, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI14_3
+; S390X-NEXT: ldeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI14_4
-; S390X-NEXT: ldeb %f6, 0(%r1)
-; S390X-NEXT: mdbr %f0, %f1
-; S390X-NEXT: mdbr %f2, %f1
-; S390X-NEXT: mdbr %f4, %f1
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: mdbr %f6, %f1
+; S390X-NEXT: mdbr %f4, %f1
+; S390X-NEXT: mdbr %f2, %f1
+; S390X-NEXT: mdbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fmul_v4f64:
; SZ13-NEXT: larl %r1, .LCPI14_1
; SZ13-NEXT: vl %v1, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI14_2
-; SZ13-NEXT: vfmdb %v24, %v1, %v0
-; SZ13-NEXT: vl %v0, 0(%r1)
; SZ13-NEXT: vfmdb %v26, %v1, %v0
+; SZ13-NEXT: vl %v0, 0(%r1)
+; SZ13-NEXT: vfmdb %v24, %v1, %v0
; SZ13-NEXT: br %r14
entry:
%mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
; S390X-LABEL: constrained_vector_fadd_v2f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI16_0
+; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI16_2
; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI16_1
-; S390X-NEXT: ld %f2, 0(%r1)
-; S390X-NEXT: adbr %f0, %f2
-; S390X-NEXT: larl %r1, .LCPI16_2
+; S390X-NEXT: ldr %f2, %f1
; S390X-NEXT: adb %f2, 0(%r1)
+; S390X-NEXT: adbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fadd_v2f64:
; S390X-LABEL: constrained_vector_fadd_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI17_0
-; S390X-NEXT: le %f1, 0(%r1)
+; S390X-NEXT: le %f0, 0(%r1)
+; S390X-NEXT: lzer %f4
+; S390X-NEXT: aebr %f4, %f0
; S390X-NEXT: larl %r1, .LCPI17_1
-; S390X-NEXT: ler %f2, %f1
-; S390X-NEXT: ler %f0, %f1
-; S390X-NEXT: aeb %f0, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI17_2
+; S390X-NEXT: ler %f2, %f0
; S390X-NEXT: aeb %f2, 0(%r1)
-; S390X-NEXT: lzer %f4
-; S390X-NEXT: aebr %f4, %f1
+; S390X-NEXT: larl %r1, .LCPI17_2
+; S390X-NEXT: aeb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fadd_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI18_0
; S390X-NEXT: ld %f0, 0(%r1)
-; S390X-NEXT: ldr %f1, %f0
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: adb %f0, 16(%r2)
-; S390X-NEXT: adb %f2, 8(%r2)
-; S390X-NEXT: adb %f1, 0(%r2)
-; S390X-NEXT: std %f0, 16(%r2)
-; S390X-NEXT: std %f2, 8(%r2)
-; S390X-NEXT: std %f1, 0(%r2)
+; S390X-NEXT: ld %f1, 8(%r2)
+; S390X-NEXT: ld %f2, 16(%r2)
+; S390X-NEXT: ldr %f3, %f0
+; S390X-NEXT: adb %f3, 0(%r2)
+; S390X-NEXT: adbr %f1, %f0
+; S390X-NEXT: adbr %f2, %f0
+; S390X-NEXT: std %f2, 16(%r2)
+; S390X-NEXT: std %f1, 8(%r2)
+; S390X-NEXT: std %f3, 0(%r2)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fadd_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI18_0
-; SZ13-NEXT: vl %v0, 0(%r2)
-; SZ13-NEXT: vl %v1, 0(%r1)
-; SZ13-NEXT: larl %r1, .LCPI18_1
-; SZ13-NEXT: vfadb %v0, %v1, %v0
; SZ13-NEXT: ld %f1, 0(%r1)
+; SZ13-NEXT: larl %r1, .LCPI18_1
+; SZ13-NEXT: vl %v0, 0(%r2)
+; SZ13-NEXT: vl %v2, 0(%r1)
; SZ13-NEXT: adb %f1, 16(%r2)
+; SZ13-NEXT: vfadb %v0, %v2, %v0
; SZ13-NEXT: std %f1, 16(%r2)
; SZ13-NEXT: vst %v0, 0(%r2)
; SZ13-NEXT: br %r14
; S390X-LABEL: constrained_vector_fadd_v4f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI19_0
-; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI19_1
-; S390X-NEXT: ld %f6, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI19_3
-; S390X-NEXT: ldeb %f4, 0(%r1)
-; S390X-NEXT: adbr %f0, %f6
+; S390X-NEXT: ldr %f2, %f1
+; S390X-NEXT: ldr %f6, %f1
+; S390X-NEXT: adb %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI19_2
-; S390X-NEXT: ldr %f2, %f6
-; S390X-NEXT: adb %f2, 0(%r1)
-; S390X-NEXT: adbr %f4, %f6
+; S390X-NEXT: ldeb %f4, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI19_4
-; S390X-NEXT: adb %f6, 0(%r1)
+; S390X-NEXT: ldeb %f0, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI19_3
+; S390X-NEXT: adb %f2, 0(%r1)
+; S390X-NEXT: adbr %f4, %f1
+; S390X-NEXT: adbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fadd_v4f64:
; SZ13-NEXT: larl %r1, .LCPI19_1
; SZ13-NEXT: vl %v1, 0(%r1)
; SZ13-NEXT: larl %r1, .LCPI19_2
-; SZ13-NEXT: vfadb %v24, %v1, %v0
-; SZ13-NEXT: vl %v0, 0(%r1)
; SZ13-NEXT: vfadb %v26, %v1, %v0
+; SZ13-NEXT: vl %v0, 0(%r1)
+; SZ13-NEXT: vfadb %v24, %v1, %v0
; SZ13-NEXT: br %r14
entry:
%add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
define <2 x double> @constrained_vector_fsub_v2f64() {
; S390X-LABEL: constrained_vector_fsub_v2f64:
; S390X: # %bb.0: # %entry
-; S390X-NEXT: larl %r1, .LCPI21_1
-; S390X-NEXT: ld %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI21_0
-; S390X-NEXT: ldeb %f1, 0(%r1)
-; S390X-NEXT: ldr %f0, %f2
+; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI21_2
+; S390X-NEXT: ldeb %f1, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI21_1
+; S390X-NEXT: ldr %f2, %f0
; S390X-NEXT: sdb %f2, 0(%r1)
; S390X-NEXT: sdbr %f0, %f1
; S390X-NEXT: br %r14
; S390X-LABEL: constrained_vector_fsub_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI22_0
-; S390X-NEXT: le %f4, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI22_1
-; S390X-NEXT: ler %f0, %f4
-; S390X-NEXT: seb %f0, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI22_2
-; S390X-NEXT: ler %f2, %f4
-; S390X-NEXT: seb %f2, 0(%r1)
+; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: lzer %f1
+; S390X-NEXT: ler %f4, %f0
; S390X-NEXT: sebr %f4, %f1
+; S390X-NEXT: larl %r1, .LCPI22_1
+; S390X-NEXT: ler %f2, %f0
+; S390X-NEXT: seb %f2, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI22_2
+; S390X-NEXT: seb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fsub_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI23_0
; S390X-NEXT: ld %f0, 0(%r1)
-; S390X-NEXT: ldr %f1, %f0
-; S390X-NEXT: ldr %f2, %f0
-; S390X-NEXT: sdb %f0, 16(%r2)
-; S390X-NEXT: sdb %f2, 8(%r2)
-; S390X-NEXT: sdb %f1, 0(%r2)
+; S390X-NEXT: ld %f1, 8(%r2)
+; S390X-NEXT: ld %f2, 16(%r2)
+; S390X-NEXT: ldr %f3, %f0
+; S390X-NEXT: sdb %f3, 0(%r2)
+; S390X-NEXT: ldr %f4, %f0
+; S390X-NEXT: sdbr %f4, %f1
+; S390X-NEXT: sdbr %f0, %f2
; S390X-NEXT: std %f0, 16(%r2)
-; S390X-NEXT: std %f2, 8(%r2)
-; S390X-NEXT: std %f1, 0(%r2)
+; S390X-NEXT: std %f4, 8(%r2)
+; S390X-NEXT: std %f3, 0(%r2)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fsub_v3f64:
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: vl %v0, 0(%r2)
+; SZ13-NEXT: vgmg %v2, 12, 10
+; SZ13-NEXT: sdb %f2, 16(%r2)
; SZ13-NEXT: vgmg %v1, 12, 10
; SZ13-NEXT: vfsdb %v0, %v1, %v0
-; SZ13-NEXT: sdb %f1, 16(%r2)
-; SZ13-NEXT: std %f1, 16(%r2)
+; SZ13-NEXT: std %f2, 16(%r2)
; SZ13-NEXT: vst %v0, 0(%r2)
; SZ13-NEXT: br %r14
entry:
define <4 x double> @constrained_vector_fsub_v4f64() {
; S390X-LABEL: constrained_vector_fsub_v4f64:
; S390X: # %bb.0: # %entry
-; S390X-NEXT: larl %r1, .LCPI24_1
-; S390X-NEXT: ld %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI24_0
-; S390X-NEXT: ldeb %f1, 0(%r1)
-; S390X-NEXT: ldr %f0, %f6
+; S390X-NEXT: ld %f0, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI24_1
+; S390X-NEXT: ldr %f6, %f0
+; S390X-NEXT: sdb %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI24_2
-; S390X-NEXT: ldr %f2, %f6
-; S390X-NEXT: sdb %f2, 0(%r1)
-; S390X-NEXT: larl %r1, .LCPI24_3
-; S390X-NEXT: ldeb %f3, 0(%r1)
-; S390X-NEXT: ldr %f4, %f6
+; S390X-NEXT: ldeb %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI24_4
-; S390X-NEXT: sdb %f6, 0(%r1)
-; S390X-NEXT: sdbr %f0, %f1
-; S390X-NEXT: sdbr %f4, %f3
+; S390X-NEXT: ldeb %f3, 0(%r1)
+; S390X-NEXT: larl %r1, .LCPI24_3
+; S390X-NEXT: ldr %f2, %f0
+; S390X-NEXT: sdb %f2, 0(%r1)
+; S390X-NEXT: ldr %f4, %f0
+; S390X-NEXT: sdbr %f4, %f1
+; S390X-NEXT: sdbr %f0, %f3
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fsub_v4f64:
; SZ13-NEXT: vl %v0, 0(%r1)
; SZ13-NEXT: vgmg %v1, 12, 10
; SZ13-NEXT: larl %r1, .LCPI24_1
-; SZ13-NEXT: vfsdb %v24, %v1, %v0
-; SZ13-NEXT: vl %v0, 0(%r1)
; SZ13-NEXT: vfsdb %v26, %v1, %v0
+; SZ13-NEXT: vl %v0, 0(%r1)
+; SZ13-NEXT: vfsdb %v24, %v1, %v0
; SZ13-NEXT: br %r14
entry:
%sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
; S390X-LABEL: constrained_vector_sqrt_v3f32:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI27_0
-; S390X-NEXT: sqeb %f0, 0(%r1)
+; S390X-NEXT: sqeb %f4, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI27_1
; S390X-NEXT: sqeb %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI27_2
-; S390X-NEXT: sqeb %f4, 0(%r1)
+; S390X-NEXT: sqeb %f0, 0(%r1)
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_sqrt_v3f32:
define void @constrained_vector_sqrt_v3f64(<3 x double>* %a) {
; S390X-LABEL: constrained_vector_sqrt_v3f64:
; S390X: # %bb.0: # %entry
-; S390X-NEXT: sqdb %f0, 16(%r2)
-; S390X-NEXT: sqdb %f1, 8(%r2)
+; S390X-NEXT: ld %f0, 8(%r2)
+; S390X-NEXT: ld %f1, 16(%r2)
; S390X-NEXT: sqdb %f2, 0(%r2)
-; S390X-NEXT: std %f0, 16(%r2)
-; S390X-NEXT: std %f1, 8(%r2)
+; S390X-NEXT: sqdbr %f0, %f0
+; S390X-NEXT: sqdbr %f1, %f1
+; S390X-NEXT: std %f1, 16(%r2)
+; S390X-NEXT: std %f0, 8(%r2)
; S390X-NEXT: std %f2, 0(%r2)
; S390X-NEXT: br %r14
;
; S390X-LABEL: constrained_vector_sqrt_v4f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI29_0
-; S390X-NEXT: sqdb %f2, 0(%r1)
+; S390X-NEXT: sqdb %f6, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI29_1
; S390X-NEXT: sqdb %f4, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI29_3
; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI29_2
-; S390X-NEXT: sqdb %f6, 0(%r1)
+; S390X-NEXT: sqdb %f2, 0(%r1)
; S390X-NEXT: sqdbr %f0, %f0
; S390X-NEXT: br %r14
;
; SZ13: # %bb.0: # %entry
; SZ13-NEXT: larl %r1, .LCPI29_0
; SZ13-NEXT: vl %v0, 0(%r1)
-; SZ13-NEXT: vfsqdb %v24, %v0
+; SZ13-NEXT: vfsqdb %v26, %v0
; SZ13-NEXT: larl %r1, .LCPI29_1
; SZ13-NEXT: vl %v0, 0(%r1)
-; SZ13-NEXT: vfsqdb %v26, %v0
+; SZ13-NEXT: vfsqdb %v24, %v0
; SZ13-NEXT: br %r14
entry:
%sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
; S390X-LABEL: constrained_vector_rint_v2f64:
; S390X: # %bb.0: # %entry
; S390X-NEXT: larl %r1, .LCPI76_0
-; S390X-NEXT: ld %f0, 0(%r1)
+; S390X-NEXT: ldeb %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI76_1
-; S390X-NEXT: ldeb %f1, 0(%r1)
-; S390X-NEXT: fidbr %f0, 0, %f0
-; S390X-NEXT: fidbr %f2, 0, %f1
+; S390X-NEXT: ld %f1, 0(%r1)
+; S390X-NEXT: fidbr %f2, 0, %f0
+; S390X-NEXT: fidbr %f0, 0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_rint_v2f64:
; S390X-NEXT: le %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI77_2
; S390X-NEXT: le %f3, 0(%r1)
-; S390X-NEXT: fiebr %f0, 0, %f0
+; S390X-NEXT: fiebr %f4, 0, %f0
; S390X-NEXT: fiebr %f2, 0, %f1
-; S390X-NEXT: fiebr %f4, 0, %f3
+; S390X-NEXT: fiebr %f0, 0, %f3
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_rint_v3f32:
; S390X-NEXT: larl %r1, .LCPI79_1
; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI79_2
-; S390X-NEXT: ld %f3, 0(%r1)
+; S390X-NEXT: ld %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI79_3
-; S390X-NEXT: ld %f5, 0(%r1)
-; S390X-NEXT: fidbr %f0, 0, %f0
-; S390X-NEXT: fidbr %f2, 0, %f1
-; S390X-NEXT: fidbr %f4, 0, %f3
-; S390X-NEXT: fidbr %f6, 0, %f5
+; S390X-NEXT: ld %f3, 0(%r1)
+; S390X-NEXT: fidbr %f6, 0, %f0
+; S390X-NEXT: fidbr %f4, 0, %f1
+; S390X-NEXT: fidbr %f2, 0, %f2
+; S390X-NEXT: fidbr %f0, 0, %f3
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_rint_v4f64:
; S390X-NEXT: ld %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI96_1
; S390X-NEXT: ld %f1, 0(%r1)
-; S390X-NEXT: ledbr %f0, %f0
-; S390X-NEXT: ledbr %f2, %f1
+; S390X-NEXT: ledbr %f2, %f0
+; S390X-NEXT: ledbr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fptrunc_v2f64:
; S390X-NEXT: larl %r1, .LCPI98_1
; S390X-NEXT: ld %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI98_2
-; S390X-NEXT: ld %f3, 0(%r1)
+; S390X-NEXT: ld %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI98_3
-; S390X-NEXT: ld %f5, 0(%r1)
-; S390X-NEXT: ledbr %f0, %f0
-; S390X-NEXT: ledbr %f2, %f1
-; S390X-NEXT: ledbr %f4, %f3
-; S390X-NEXT: ledbr %f6, %f5
+; S390X-NEXT: ld %f3, 0(%r1)
+; S390X-NEXT: ledbr %f6, %f0
+; S390X-NEXT: ledbr %f4, %f1
+; S390X-NEXT: ledbr %f2, %f2
+; S390X-NEXT: ledbr %f0, %f3
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fptrunc_v4f64:
; S390X-NEXT: le %f0, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI100_1
; S390X-NEXT: le %f1, 0(%r1)
-; S390X-NEXT: ldebr %f0, %f0
-; S390X-NEXT: ldebr %f2, %f1
+; S390X-NEXT: ldebr %f2, %f0
+; S390X-NEXT: ldebr %f0, %f1
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fpext_v2f32:
; S390X-NEXT: larl %r1, .LCPI102_1
; S390X-NEXT: le %f1, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI102_2
-; S390X-NEXT: le %f3, 0(%r1)
+; S390X-NEXT: le %f2, 0(%r1)
; S390X-NEXT: larl %r1, .LCPI102_3
-; S390X-NEXT: le %f5, 0(%r1)
-; S390X-NEXT: ldebr %f0, %f0
-; S390X-NEXT: ldebr %f2, %f1
-; S390X-NEXT: ldebr %f4, %f3
-; S390X-NEXT: ldebr %f6, %f5
+; S390X-NEXT: le %f3, 0(%r1)
+; S390X-NEXT: ldebr %f6, %f0
+; S390X-NEXT: ldebr %f4, %f1
+; S390X-NEXT: ldebr %f2, %f2
+; S390X-NEXT: ldebr %f0, %f3
; S390X-NEXT: br %r14
;
; SZ13-LABEL: constrained_vector_fpext_v4f32:
mayLoad_Unset = Unset;
mayStore = R->getValueAsBitOrUnset("mayStore", Unset);
mayStore_Unset = Unset;
+ mayRaiseFPException = R->getValueAsBit("mayRaiseFPException");
hasSideEffects = R->getValueAsBitOrUnset("hasSideEffects", Unset);
hasSideEffects_Unset = Unset;
bool mayLoad_Unset : 1;
bool mayStore : 1;
bool mayStore_Unset : 1;
+ bool mayRaiseFPException : 1;
bool isPredicable : 1;
bool isConvertibleToThreeAddress : 1;
bool isCommutable : 1;
if (Inst.canFoldAsLoad) OS << "|(1ULL<<MCID::FoldableAsLoad)";
if (Inst.mayLoad) OS << "|(1ULL<<MCID::MayLoad)";
if (Inst.mayStore) OS << "|(1ULL<<MCID::MayStore)";
+ if (Inst.mayRaiseFPException) OS << "|(1ULL<<MCID::MayRaiseFPException)";
if (Inst.isPredicable) OS << "|(1ULL<<MCID::Predicable)";
if (Inst.isConvertibleToThreeAddress) OS << "|(1ULL<<MCID::ConvertibleTo3Addr)";
if (Inst.isCommutable) OS << "|(1ULL<<MCID::Commutable)";