From 3767f18861be3f8ad2b2f3e8caad022946debc6e Mon Sep 17 00:00:00 2001 From: Shiva Chen Date: Wed, 28 Aug 2019 23:40:37 +0000 Subject: [PATCH] [RISCV] Avoid generating AssertZext for LP64 ABI when lowering floating LibCall The patch fixed the issue that RV64 didn't clear the upper bits when return complex floating value with lp64 ABI. float _Complex complex_add(float _Complex a, float _Complex b) { return a + b; } RealResult = zero_extend(RealA + RealB) ImageResult = ImageA + ImageB Return (RealResult | (ImageResult << 32)) The patch introduces shouldExtendTypeInLibCall target hook to suppress the AssertZext generation when lowering floating LibCall. Thanks to Eli's comments from the Bugzilla https://bugs.llvm.org/show_bug.cgi?id=42820 Differential Revision: https://reviews.llvm.org/D65497 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@370275 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/TargetLowering.h | 18 +- .../SelectionDAG/LegalizeFloatTypes.cpp | 44 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 30 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 6 +- lib/Target/ARM/ARMISelLowering.cpp | 4 +- lib/Target/RISCV/RISCVISelLowering.cpp | 10 + lib/Target/RISCV/RISCVISelLowering.h | 2 + test/CodeGen/RISCV/calling-conv-lp64.ll | 6 +- .../CodeGen/RISCV/rv32i-rv64i-float-double.ll | 6 +- test/CodeGen/RISCV/rv64i-complex-float.ll | 53 ++ test/CodeGen/RISCV/rv64i-single-softfloat.ll | 712 ++++++++++++++++++ 11 files changed, 868 insertions(+), 23 deletions(-) create mode 100644 test/CodeGen/RISCV/rv64i-complex-float.ll create mode 100644 test/CodeGen/RISCV/rv64i-single-softfloat.ll diff --git a/include/llvm/CodeGen/TargetLowering.h b/include/llvm/CodeGen/TargetLowering.h index 6b623e881ad..d27af295ed0 100644 --- a/include/llvm/CodeGen/TargetLowering.h +++ b/include/llvm/CodeGen/TargetLowering.h @@ -1814,6 +1814,11 @@ public: return IsSigned; } + /// Returns true if arguments should be extended in lib calls. + virtual bool shouldExtendTypeInLibCall(EVT Type) const { + return true; + } + /// Returns how the given (atomic) load should be expanded by the /// IR-level AtomicExpand pass. virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { @@ -2987,7 +2992,8 @@ public: void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &DL) const; + const SDLoc &DL, const SDValue OldLHS, + const SDValue OldRHS) const; /// Returns a pair of (return value, chain). /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. @@ -3533,6 +3539,11 @@ public: /// This structure is used to pass arguments to makeLibCall function. struct MakeLibCallOptions { + // By passing the node before soften to makeLibCall, the target hook + // shouldExtendTypeInLibCall can get the original type before soften. + // It could be generalized by passing orignal type lists if necessary + // in the future. + SDNode *NodeBeforeSoften = nullptr; bool IsSExt : 1; bool DoesNotReturn : 1; bool IsReturnValueUsed : 1; @@ -3561,6 +3572,11 @@ public: IsPostTypeLegalization = Value; return *this; } + + MakeLibCallOptions &setNodeBeforeSoften(SDNode *N) { + NodeBeforeSoften = N; + return *this; + } }; /// This function lowers an abstract call to a function into an actual call. diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index b71486f7114..9c99de35c89 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -201,6 +201,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMIN_F32, RTLIB::FMIN_F64, @@ -215,6 +216,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMAX_F32, RTLIB::FMAX_F64, @@ -229,6 +231,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, @@ -242,6 +245,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, @@ -306,6 +310,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, @@ -320,6 +325,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::DIV_F32, RTLIB::DIV_F64, @@ -333,6 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, @@ -346,6 +353,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, @@ -359,6 +367,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, @@ -372,6 +381,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, @@ -385,6 +395,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, @@ -398,6 +409,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::LOG10_F32, RTLIB::LOG10_F64, @@ -413,6 +425,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::FMA_F32, RTLIB::FMA_F64, @@ -427,6 +440,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::MUL_F32, RTLIB::MUL_F64, @@ -440,6 +454,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, @@ -502,6 +517,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } @@ -511,6 +527,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) { EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32); SDValue Op = N->getOperand(0); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op, CallOptions, SDLoc(N)).first; if (N->getValueType(0) == MVT::f32) @@ -534,6 +551,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first; } @@ -542,6 +560,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, @@ -557,6 +576,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, @@ -571,6 +591,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::REM_F32, RTLIB::REM_F64, @@ -584,6 +605,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, @@ -597,6 +619,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::ROUND_F32, RTLIB::ROUND_F64, @@ -610,6 +633,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, @@ -623,6 +647,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, @@ -637,6 +662,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::SUB_F32, RTLIB::SUB_F64, @@ -653,6 +679,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, @@ -766,6 +793,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; CallOptions.setSExt(Signed); + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), Op, CallOptions, dl).first; @@ -899,6 +927,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall"); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } @@ -917,6 +946,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first; } @@ -927,7 +957,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(2), N->getOperand(3)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -995,6 +1026,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first; // Truncate the result if the libcall returns a larger type. @@ -1019,7 +1051,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1042,7 +1075,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N), + N->getOperand(0), N->getOperand(1)); // If softenSetCCOperands returned a scalar, use it. if (!NewRHS.getNode()) { @@ -1081,6 +1115,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LROUND_F32, RTLIB::LROUND_F64, @@ -1096,6 +1131,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLROUND_F32, RTLIB::LLROUND_F64, @@ -1111,6 +1147,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LRINT_F32, RTLIB::LRINT_F64, @@ -1126,6 +1163,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) { SDValue Op = GetSoftenedFloat(N->getOperand(0)); EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy; TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(N); return TLI.makeLibCall(DAG, GetFPLibCall(RetVT, RTLIB::LLRINT_F32, RTLIB::LLRINT_F64, diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8c322df862d..79caf880219 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -127,12 +127,19 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Args.reserve(Ops.size()); TargetLowering::ArgListEntry Entry; - for (SDValue Op : Ops) { - Entry.Node = Op; + SDNode *N = CallOptions.NodeBeforeSoften; + for (unsigned i = 0; i < Ops.size(); ++i) { + SDValue NewOp = Ops[i]; + Entry.Node = NewOp; Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), + Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(), CallOptions.IsSExt); Entry.IsZExt = !Entry.IsSExt; + + SDValue OldOp = N ? N->getOperand(i) : NewOp; + if (!shouldExtendTypeInLibCall(OldOp.getValueType())) { + Entry.IsSExt = Entry.IsZExt = false; + } Args.push_back(Entry); } @@ -144,6 +151,13 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt); + bool zeroExtend = !signExtend; + + RetVT = N ? N->getValueType(0) : RetVT; + if (!shouldExtendTypeInLibCall(RetVT)) { + signExtend = zeroExtend = false; + } + CLI.setDebugLoc(dl) .setChain(DAG.getEntryNode()) .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args)) @@ -151,7 +165,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, .setDiscardResult(!CallOptions.IsReturnValueUsed) .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization) .setSExtResult(signExtend) - .setZExtResult(!signExtend); + .setZExtResult(zeroExtend); return LowerCallTo(CLI); } @@ -262,7 +276,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector &MemOps, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - const SDLoc &dl) const { + const SDLoc &dl, const SDValue OldLHS, + const SDValue OldRHS) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128) && "Unsupported setcc type!"); @@ -364,7 +379,12 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = {NewLHS, NewRHS}; + SDValue OldSETCC = DAG.getNode( + ISD::SETCC, dl, + getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT), + OldLHS, OldRHS, DAG.getCondCode(CCCode)); TargetLowering::MakeLibCallOptions CallOptions; + CallOptions.setNodeBeforeSoften(OldSETCC.getNode()); NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first; NewRHS = DAG.getConstant(0, dl, RetVT); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 9c4576799b4..febc1ffbc44 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4555,7 +4555,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { // value of a libcall against zero, which is just what the rest of LowerBR_CC // is expecting to deal with. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -4821,7 +4821,7 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { // Handle f128 first, since one possible outcome is a normal integer // comparison which gets picked up by the next if statement. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, use it. if (!RHS.getNode()) { @@ -4883,7 +4883,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS, // Handle f128 first, because it will result in a comparison of some RTLIB // call result against zero. if (LHS.getValueType() == MVT::f128) { - softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl); + softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index a8c8f212339..849a174694d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4818,7 +4818,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { if (isUnsupportedFloatingType(LHS.getValueType())) { DAG.getTargetLoweringInfo().softenSetCCOperands( - DAG, LHS.getValueType(), LHS, RHS, CC, dl); + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -5062,7 +5062,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { if (isUnsupportedFloatingType(LHS.getValueType())) { DAG.getTargetLoweringInfo().softenSetCCOperands( - DAG, LHS.getValueType(), LHS, RHS, CC, dl); + DAG, LHS.getValueType(), LHS, RHS, CC, dl, LHS, RHS); // If softenSetCCOperands only returned one value, we should compare it to // zero. diff --git a/lib/Target/RISCV/RISCVISelLowering.cpp b/lib/Target/RISCV/RISCVISelLowering.cpp index 032074b89ea..6f5978fb3cb 100644 --- a/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2777,3 +2777,13 @@ unsigned RISCVTargetLowering::getExceptionSelectorRegister( const Constant *PersonalityFn) const { return RISCV::X11; } + +bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const { + // Return false to suppress the unnecessary extensions if the LibCall + // arguments or return value is f32 type for LP64 ABI. + RISCVABI::ABI ABI = Subtarget.getTargetABI(); + if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32)) + return false; + + return true; +} diff --git a/lib/Target/RISCV/RISCVISelLowering.h b/lib/Target/RISCV/RISCVISelLowering.h index e2059e70831..18fc7350bbb 100644 --- a/lib/Target/RISCV/RISCVISelLowering.h +++ b/lib/Target/RISCV/RISCVISelLowering.h @@ -145,6 +145,8 @@ public: unsigned getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + bool shouldExtendTypeInLibCall(EVT Type) const override; + private: void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo, const SmallVectorImpl &Ins, diff --git a/test/CodeGen/RISCV/calling-conv-lp64.ll b/test/CodeGen/RISCV/calling-conv-lp64.ll index faa68185a9f..3e6941c6c2a 100644 --- a/test/CodeGen/RISCV/calling-conv-lp64.ll +++ b/test/CodeGen/RISCV/calling-conv-lp64.ll @@ -21,8 +21,7 @@ define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { ; RV64I-FPELIM-NEXT: sd ra, 8(sp) ; RV64I-FPELIM-NEXT: sd s0, 0(sp) ; RV64I-FPELIM-NEXT: mv s0, a0 -; RV64I-FPELIM-NEXT: slli a0, a1, 32 -; RV64I-FPELIM-NEXT: srli a0, a0, 32 +; RV64I-FPELIM-NEXT: mv a0, a1 ; RV64I-FPELIM-NEXT: call __fixsfdi ; RV64I-FPELIM-NEXT: add a0, s0, a0 ; RV64I-FPELIM-NEXT: ld s0, 0(sp) @@ -38,8 +37,7 @@ define i64 @callee_float_in_regs(i64 %a, float %b) nounwind { ; RV64I-WITHFP-NEXT: sd s1, 8(sp) ; RV64I-WITHFP-NEXT: addi s0, sp, 32 ; RV64I-WITHFP-NEXT: mv s1, a0 -; RV64I-WITHFP-NEXT: slli a0, a1, 32 -; RV64I-WITHFP-NEXT: srli a0, a0, 32 +; RV64I-WITHFP-NEXT: mv a0, a1 ; RV64I-WITHFP-NEXT: call __fixsfdi ; RV64I-WITHFP-NEXT: add a0, s1, a0 ; RV64I-WITHFP-NEXT: ld s1, 8(sp) diff --git a/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll b/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll index 7bb4e28facb..11e7568eba3 100644 --- a/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll +++ b/test/CodeGen/RISCV/rv32i-rv64i-float-double.ll @@ -31,11 +31,7 @@ define float @float_test(float %a, float %b) nounwind { ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) ; RV64IF-NEXT: sd s0, 0(sp) -; RV64IF-NEXT: slli a0, a0, 32 -; RV64IF-NEXT: srli a0, a0, 32 -; RV64IF-NEXT: slli a1, a1, 32 -; RV64IF-NEXT: srli s0, a1, 32 -; RV64IF-NEXT: mv a1, s0 +; RV64IF-NEXT: mv s0, a1 ; RV64IF-NEXT: call __addsf3 ; RV64IF-NEXT: mv a1, s0 ; RV64IF-NEXT: call __divsf3 diff --git a/test/CodeGen/RISCV/rv64i-complex-float.ll b/test/CodeGen/RISCV/rv64i-complex-float.ll new file mode 100644 index 00000000000..89e3f414e58 --- /dev/null +++ b/test/CodeGen/RISCV/rv64i-complex-float.ll @@ -0,0 +1,53 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s + +; The complex floating value will be returned by a single register for LP64 ABI. +; The test case check that the real part returned by __addsf3 will be +; cleared upper bits by shifts to avoid corrupting the imaginary part. + +define i64 @complex_float_add(i64 %a.coerce, i64 %b.coerce) nounwind { +; CHECK-LABEL: complex_float_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd ra, 24(sp) +; CHECK-NEXT: sd s0, 16(sp) +; CHECK-NEXT: sd s1, 8(sp) +; CHECK-NEXT: sd s2, 0(sp) +; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv s1, a0 +; CHECK-NEXT: call __addsf3 +; CHECK-NEXT: mv s2, a0 +; CHECK-NEXT: srli a0, s1, 32 +; CHECK-NEXT: srli a1, s0, 32 +; CHECK-NEXT: call __addsf3 +; CHECK-NEXT: slli a1, s2, 32 +; CHECK-NEXT: srli a1, a1, 32 +; CHECK-NEXT: slli a0, a0, 32 +; CHECK-NEXT: or a0, a0, a1 +; CHECK-NEXT: ld s2, 0(sp) +; CHECK-NEXT: ld s1, 8(sp) +; CHECK-NEXT: ld s0, 16(sp) +; CHECK-NEXT: ld ra, 24(sp) +; CHECK-NEXT: addi sp, sp, 32 +; CHECK-NEXT: ret +entry: + %a.sroa.0.0.extract.trunc = trunc i64 %a.coerce to i32 + %0 = bitcast i32 %a.sroa.0.0.extract.trunc to float + %a.sroa.2.0.extract.shift = lshr i64 %a.coerce, 32 + %a.sroa.2.0.extract.trunc = trunc i64 %a.sroa.2.0.extract.shift to i32 + %1 = bitcast i32 %a.sroa.2.0.extract.trunc to float + %b.sroa.0.0.extract.trunc = trunc i64 %b.coerce to i32 + %2 = bitcast i32 %b.sroa.0.0.extract.trunc to float + %b.sroa.2.0.extract.shift = lshr i64 %b.coerce, 32 + %b.sroa.2.0.extract.trunc = trunc i64 %b.sroa.2.0.extract.shift to i32 + %3 = bitcast i32 %b.sroa.2.0.extract.trunc to float + %add.r = fadd float %0, %2 + %add.i = fadd float %1, %3 + %4 = bitcast float %add.r to i32 + %5 = bitcast float %add.i to i32 + %retval.sroa.2.0.insert.ext = zext i32 %5 to i64 + %retval.sroa.2.0.insert.shift = shl nuw i64 %retval.sroa.2.0.insert.ext, 32 + %retval.sroa.0.0.insert.ext = zext i32 %4 to i64 + %retval.sroa.0.0.insert.insert = or i64 %retval.sroa.2.0.insert.shift, %retval.sroa.0.0.insert.ext + ret i64 %retval.sroa.0.0.insert.insert +} diff --git a/test/CodeGen/RISCV/rv64i-single-softfloat.ll b/test/CodeGen/RISCV/rv64i-single-softfloat.ll new file mode 100644 index 00000000000..e486a48cb22 --- /dev/null +++ b/test/CodeGen/RISCV/rv64i-single-softfloat.ll @@ -0,0 +1,712 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I %s + +; The test cases check that the single float arguments won't be extended +; when passing to softfloat functions. +; RISCV backend using shouldExtendTypeInLibCall target hook to suppress +; the extension generation. + +define float @fadd_s(float %a, float %b) nounwind { +; RV64I-LABEL: fadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fadd float %a, %b + ret float %1 +} + +define float @fsub_s(float %a, float %b) nounwind { +; RV64I-LABEL: fsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __subsf3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fsub float %a, %b + ret float %1 +} + +define float @fmul_s(float %a, float %b) nounwind { +; RV64I-LABEL: fmul_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __mulsf3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fmul float %a, %b + ret float %1 +} + +define float @fdiv_s(float %a, float %b) nounwind { +; RV64I-LABEL: fdiv_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __divsf3 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fdiv float %a, %b + ret float %1 +} + +define i32 @feq_s(float %a, float %b) nounwind { +; RV64I-LABEL: feq_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __eqsf2 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp oeq float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @flt_s(float %a, float %b) nounwind { +; RV64I-LABEL: flt_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __ltsf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 0 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp olt float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fle_s(float %a, float %b) nounwind { +; RV64I-LABEL: fle_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __lesf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: slti a0, a0, 1 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ole float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ogt(float %a, float %b) nounwind { +; RV64I-LABEL: fcmp_ogt: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __gtsf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sgtz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ogt float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_oge(float %a, float %b) nounwind { +; RV64I-LABEL: fcmp_oge: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __gesf2 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: addi a1, zero, -1 +; RV64I-NEXT: slt a0, a1, a0 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp oge float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_ord(float %a, float %b) nounwind { +; RV64I-LABEL: fcmp_ord: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __unordsf2 +; RV64I-NEXT: seqz a0, a0 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp ord float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcmp_une(float %a, float %b) nounwind { +; RV64I-LABEL: fcmp_une: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __nesf2 +; RV64I-NEXT: snez a0, a0 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fcmp une float %a, %b + %2 = zext i1 %1 to i32 + ret i32 %2 +} + +define i32 @fcvt_w_s(float %a) nounwind { +; RV64I-LABEL: fcvt_w_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __fixsfdi +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fptosi float %a to i32 + ret i32 %1 +} + +define i32 @fcvt_wu_s(float %a) nounwind { +; RV64I-LABEL: fcvt_wu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __fixunssfdi +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fptoui float %a to i32 + ret i32 %1 +} + +define float @fcvt_s_w(i32 %a) nounwind { +; RV64I-LABEL: fcvt_s_w: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: call __floatsisf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = sitofp i32 %a to float + ret float %1 +} + +define float @fcvt_s_wu(i32 %a) nounwind { +; RV64I-LABEL: fcvt_s_wu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: call __floatunsisf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = uitofp i32 %a to float + ret float %1 +} + +define i64 @fcvt_l_s(float %a) nounwind { +; RV64I-LABEL: fcvt_l_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __fixsfdi +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fptosi float %a to i64 + ret i64 %1 +} + +define i64 @fcvt_lu_s(float %a) nounwind { +; RV64I-LABEL: fcvt_lu_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __fixunssfdi +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = fptoui float %a to i64 + ret i64 %1 +} + +define float @fcvt_s_l(i64 %a) nounwind { +; RV64I-LABEL: fcvt_s_l: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __floatdisf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = sitofp i64 %a to float + ret float %1 +} + +define float @fcvt_s_lu(i64 %a) nounwind { +; RV64I-LABEL: fcvt_s_lu: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __floatundisf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = uitofp i64 %a to float + ret float %1 +} + +declare float @llvm.sqrt.f32(float) + +define float @fsqrt_s(float %a) nounwind { +; RV64I-LABEL: fsqrt_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call sqrtf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.sqrt.f32(float %a) + ret float %1 +} + +declare float @llvm.copysign.f32(float, float) + +define float @fsgnj_s(float %a, float %b) nounwind { +; RV64I-LABEL: fsgnj_s: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: addiw a2, a2, -1 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ret + %1 = call float @llvm.copysign.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.minnum.f32(float, float) + +define float @fmin_s(float %a, float %b) nounwind { +; RV64I-LABEL: fmin_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call fminf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.minnum.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.maxnum.f32(float, float) + +define float @fmax_s(float %a, float %b) nounwind { +; RV64I-LABEL: fmax_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call fmaxf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.maxnum.f32(float %a, float %b) + ret float %1 +} + + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s(float %a, float %b, float %c) nounwind { +; RV64I-LABEL: fmadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %1 +} + +define float @fmsub_s(float %a, float %b, float %c) nounwind { +; RV64I-LABEL: fmsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a1, zero +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a2, a0, a1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %c_ = fadd float 0.0, %c ; avoid negation using xor + %negc = fsub float -0.0, %c_ + %1 = call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %1 +} + +define float @fnmadd_s(float %a, float %b, float %c) nounwind { +; RV64I-LABEL: fnmadd_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: sd s2, 0(sp) +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s2, a1 +; RV64I-NEXT: mv a1, zero +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a1, zero +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a2, 524288 +; RV64I-NEXT: xor a1, s1, a2 +; RV64I-NEXT: xor a2, a0, a2 +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, s2 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld s2, 0(sp) +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a + %c_ = fadd float 0.0, %c + %nega = fsub float -0.0, %a_ + %negc = fsub float -0.0, %c_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc) + ret float %1 +} + +define float @fnmsub_s(float %a, float %b, float %c) nounwind { +; RV64I-LABEL: fnmsub_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: sd ra, 24(sp) +; RV64I-NEXT: sd s0, 16(sp) +; RV64I-NEXT: sd s1, 8(sp) +; RV64I-NEXT: mv s0, a2 +; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv a1, zero +; RV64I-NEXT: call __addsf3 +; RV64I-NEXT: lui a1, 524288 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: call fmaf +; RV64I-NEXT: ld s1, 8(sp) +; RV64I-NEXT: ld s0, 16(sp) +; RV64I-NEXT: ld ra, 24(sp) +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: ret + %a_ = fadd float 0.0, %a + %nega = fsub float -0.0, %a_ + %1 = call float @llvm.fma.f32(float %nega, float %b, float %c) + ret float %1 +} + +declare float @llvm.ceil.f32(float) + +define float @fceil_s(float %a) nounwind { +; RV64I-LABEL: fceil_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call ceilf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.ceil.f32(float %a) + ret float %1 +} + +declare float @llvm.cos.f32(float) + +define float @fcos_s(float %a) nounwind { +; RV64I-LABEL: fcos_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call cosf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.cos.f32(float %a) + ret float %1 +} + +declare float @llvm.sin.f32(float) + +define float @fsin_s(float %a) nounwind { +; RV64I-LABEL: fsin_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call sinf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.sin.f32(float %a) + ret float %1 +} + +declare float @llvm.exp.f32(float) + +define float @fexp_s(float %a) nounwind { +; RV64I-LABEL: fexp_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call expf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.exp.f32(float %a) + ret float %1 +} + +declare float @llvm.exp2.f32(float) + +define float @fexp2_s(float %a) nounwind { +; RV64I-LABEL: fexp2_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call exp2f +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.exp2.f32(float %a) + ret float %1 +} + +declare float @llvm.floor.f32(float) + +define float @ffloor_s(float %a) nounwind { +; RV64I-LABEL: ffloor_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call floorf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.floor.f32(float %a) + ret float %1 +} + +declare float @llvm.flog.f32(float) + +define float @fflog_s(float %a) nounwind { +; RV64I-LABEL: fflog_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call llvm.flog.f32 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.flog.f32(float %a) + ret float %1 +} + +declare float @llvm.flog2.f32(float) + +define float @fflog2_s(float %a) nounwind { +; RV64I-LABEL: fflog2_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call llvm.flog2.f32 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.flog2.f32(float %a) + ret float %1 +} + +declare float @llvm.flog10.f32(float) + +define float @fflog10_s(float %a) nounwind { +; RV64I-LABEL: fflog10_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call llvm.flog10.f32 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.flog10.f32(float %a) + ret float %1 +} + +declare float @llvm.fnearbyint.f32(float) + +define float @fnearbyint_s(float %a) nounwind { +; RV64I-LABEL: fnearbyint_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call llvm.fnearbyint.f32 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fnearbyint.f32(float %a) + ret float %1 +} + +declare float @llvm.round.f32(float) + +define float @fround_s(float %a) nounwind { +; RV64I-LABEL: fround_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call roundf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.round.f32(float %a) + ret float %1 +} + +declare float @llvm.fpround.f32(float) + +define float @fpround_s(float %a) nounwind { +; RV64I-LABEL: fpround_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call llvm.fpround.f32 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.fpround.f32(float %a) + ret float %1 +} + +declare float @llvm.rint.f32(float) + +define float @frint_s(float %a) nounwind { +; RV64I-LABEL: frint_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call rintf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.rint.f32(float %a) + ret float %1 +} + +declare float @llvm.rem.f32(float) + +define float @frem_s(float %a) nounwind { +; RV64I-LABEL: frem_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call llvm.rem.f32 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.rem.f32(float %a) + ret float %1 +} + +declare float @llvm.pow.f32(float %Val, float %power) + +define float @fpow_s(float %a, float %b) nounwind { +; RV64I-LABEL: fpow_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call powf +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.pow.f32(float %a, float %b) + ret float %1 +} + +declare float @llvm.powi.f32(float %Val, i32 %power) + +define float @fpowi_s(float %a, i32 %b) nounwind { +; RV64I-LABEL: fpowi_s: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: call __powisf2 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = call float @llvm.powi.f32(float %a, i32 %b) + ret float %1 +} + +define double @fp_ext(float %a) nounwind { +; RV64I-LABEL: fp_ext: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __extendsfdf2 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %conv = fpext float %a to double + ret double %conv +} + +define float @fp_trunc(double %a) nounwind { +; RV64I-LABEL: fp_trunc: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: call __truncdfsf2 +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %conv = fptrunc double %a to float + ret float %conv +} -- 2.40.0