class MCSymbol;
template<typename T> class SmallVectorImpl;
class DataLayout;
- struct TargetRecip;
class TargetRegisterClass;
class TargetLibraryInfo;
class TargetLoweringObjectFile;
return false;
}
+ /// Reciprocal estimate status values used by the functions below.
+ enum ReciprocalEstimate : int {
+ Unspecified = -1,
+ Disabled = 0,
+ Enabled = 1
+ };
+
+ /// Return a ReciprocalEstimate enum value for a square root of the given type
+ /// based on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
+
+ /// Return a ReciprocalEstimate enum value for a division of the given type
+ /// based on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
+
+ /// Return the refinement step count for a square root of the given type based
+ /// on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
+
+ /// Return the refinement step count for a division of the given type based
+ /// on the function's attributes. If the operation is not overridden by
+ /// the function's attributes, "Unspecified" is returned and target defaults
+ /// are expected to be used for instruction selection.
+ int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
+
/// Returns true if target has indicated at least one type should be bypassed.
bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
}
}
- /// Return the reciprocal estimate code generation preferences for this target
- /// after potentially overriding settings using the function's attributes.
- /// FIXME: Like all unsafe-math target settings, this should really be an
- /// instruction-level attribute/metadata/FMF.
- TargetRecip getTargetRecipForFunc(MachineFunction &MF) const;
-
/// Vector types are broken down into some number of legal first class types.
/// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
/// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
/// sequence of memory operands that is recognized by PrologEpilogInserter.
MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
MachineBasicBlock *MBB) const;
- TargetRecip ReciprocalEstimates;
};
/// This class defines information used to lower LLVM code to legal SelectionDAG
/// roots.
/// Return a reciprocal square root estimate value for the input operand.
- /// The RefinementSteps output is the number of Newton-Raphson refinement
- /// iterations required to generate a sufficient (though not necessarily
- /// IEEE-754 compliant) estimate for the value type.
+ /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+ /// 'Enabled' as set by a potential default override attribute.
+ /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+ /// refinement iterations required to generate a sufficient (though not
+ /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
/// The boolean UseOneConstNR output is used to select a Newton-Raphson
/// algorithm implementation that uses one constant or two constants.
/// A target may choose to implement its own refinement within this function.
/// If that's true, then return '0' as the number of RefinementSteps to avoid
/// any further refinement of the estimate.
/// An empty SDValue return means no estimate sequence can be created.
- virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ virtual SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
bool &UseOneConstNR) const {
return SDValue();
}
/// Return a reciprocal estimate value for the input operand.
- /// The RefinementSteps output is the number of Newton-Raphson refinement
- /// iterations required to generate a sufficient (though not necessarily
- /// IEEE-754 compliant) estimate for the value type.
+ /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+ /// 'Enabled' as set by a potential default override attribute.
+ /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+ /// refinement iterations required to generate a sufficient (though not
+ /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
/// A target may choose to implement its own refinement within this function.
/// If that's true, then return '0' as the number of RefinementSteps to avoid
/// any further refinement of the estimate.
/// An empty SDValue return means no estimate sequence can be created.
- virtual SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+ virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps) const {
return SDValue();
}
#ifndef LLVM_TARGET_TARGETOPTIONS_H
#define LLVM_TARGET_TARGETOPTIONS_H
-#include "llvm/Target/TargetRecip.h"
#include "llvm/MC/MCTargetOptions.h"
namespace llvm {
+++ /dev/null
-//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGETRECIP_H
-#define LLVM_TARGET_TARGETRECIP_H
-
-#include <cstdint>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-class StringRef;
-
-struct TargetRecip {
-public:
- TargetRecip();
-
- /// Parse a comma-separated string of reciprocal settings to set values in
- /// this struct.
- void set(StringRef &Args);
-
- /// Set enablement and refinement steps for a particular reciprocal operation.
- /// Use "all" to give all operations the same values.
- void set(StringRef Key, bool Enable, unsigned RefSteps);
-
- /// Return true if the reciprocal operation has been enabled.
- bool isEnabled(StringRef Key) const;
-
- /// Return the number of iterations necessary to refine the
- /// the result of a machine instruction for the given reciprocal operation.
- unsigned getRefinementSteps(StringRef Key) const;
-
- bool operator==(const TargetRecip &Other) const;
-
-private:
- // TODO: We should be able to use special values (enums) to simplify this into
- // just an int, but we have to be careful because the user is allowed to
- // specify "default" as a setting and just change the refinement step count.
- struct RecipParams {
- bool Enabled;
- int8_t RefinementSteps;
-
- RecipParams() : Enabled(false), RefinementSteps(0) {}
- };
-
- std::map<StringRef, RecipParams> RecipMap;
- typedef std::map<StringRef, RecipParams>::iterator RecipIter;
- typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
-
- bool parseGlobalParams(const std::string &Arg);
- void parseIndividualParams(const std::vector<std::string> &Args);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_TARGETRECIP_H
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
- unsigned Iterations = 0;
- if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getDivRefinementSteps(VT, MF);
+ if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
if (Iterations) {
// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
// For the reciprocal, we need to find the zero of the function:
AddToWorklist(Est.getNode());
// Newton iterations: Est = Est + Est (1 - Arg * Est)
- for (unsigned i = 0; i < Iterations; ++i) {
+ for (int i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
AddToWorklist(NewEst.getNode());
if (Level >= AfterLegalizeDAG)
return SDValue();
- // Expose the DAG combiner to the target combiner implementations.
- TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
- unsigned Iterations = 0;
+ // TODO: Handle half and/or extended types?
+ EVT VT = Op.getValueType();
+ if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+ return SDValue();
+
+ // If estimates are explicitly disabled for this function, we're done.
+ MachineFunction &MF = DAG.getMachineFunction();
+ int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+ if (Enabled == TLI.ReciprocalEstimate::Disabled)
+ return SDValue();
+
+ // Estimates may be explicitly enabled for this type with a custom number of
+ // refinement steps.
+ int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
bool UseOneConstNR = false;
- if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+ if (SDValue Est =
+ TLI.getRsqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR)) {
AddToWorklist(Est.getNode());
if (Iterations) {
Est = UseOneConstNR
- ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
- : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+ ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+ : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
}
return Est;
}
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Triple.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple());
InitCmpLibcallCCs(CmpLibcallCCs);
InitLibcallCallingConvs(LibcallCallingConvs);
- ReciprocalEstimates.set("all", false, 0);
}
void TargetLoweringBase::initActions() {
return MVT::i32; // return the default value
}
-TargetRecip
-TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const {
- const Function *F = MF.getFunction();
- StringRef RecipAttrName = "reciprocal-estimates";
- if (!F->hasFnAttribute(RecipAttrName))
- return ReciprocalEstimates;
-
- // Make a copy of the target's default reciprocal codegen settings.
- TargetRecip Recips = ReciprocalEstimates;
-
- // Override any settings that are customized for this function.
- StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString();
- Recips.set(RecipString);
- return Recips;
-}
-
/// getVectorTypeBreakdown - Vector types are broken down into some number of
/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32
/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack.
void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
MaximumJumpTableSize = Val;
}
+
+//===----------------------------------------------------------------------===//
+// Reciprocal Estimates
+//===----------------------------------------------------------------------===//
+
+/// Get the reciprocal estimate attribute string for a function that will
+/// override the target defaults.
+static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
+ const Function *F = MF.getFunction();
+ StringRef RecipAttrName = "reciprocal-estimates";
+ if (!F->hasFnAttribute(RecipAttrName))
+ return StringRef();
+
+ return F->getFnAttribute(RecipAttrName).getValueAsString();
+}
+
+/// Construct a string for the given reciprocal operation of the given type.
+/// This string should match the corresponding option to the front-end's
+/// "-mrecip" flag assuming those strings have been passed through in an
+/// attribute string. For example, "vec-divf" for a division of a vXf32.
+static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
+ std::string Name = VT.isVector() ? "vec-" : "";
+
+ Name += IsSqrt ? "sqrt" : "div";
+
+ // TODO: Handle "half" or other float types?
+ if (VT.getScalarType() == MVT::f64) {
+ Name += "d";
+ } else {
+ assert(VT.getScalarType() == MVT::f32 &&
+ "Unexpected FP type for reciprocal estimate");
+ Name += "f";
+ }
+
+ return Name;
+}
+
+/// Return the character position and value (a single numeric character) of a
+/// customized refinement operation in the input string if it exists. Return
+/// false if there is no customized refinement step count.
+static bool parseRefinementStep(StringRef In, size_t &Position,
+ uint8_t &Value) {
+ const char RefStepToken = ':';
+ Position = In.find(RefStepToken);
+ if (Position == StringRef::npos)
+ return false;
+
+ StringRef RefStepString = In.substr(Position + 1);
+ // Allow exactly one numeric character for the additional refinement
+ // step parameter.
+ if (RefStepString.size() == 1) {
+ char RefStepChar = RefStepString[0];
+ if (RefStepChar >= '0' && RefStepChar <= '9') {
+ Value = RefStepChar - '0';
+ return true;
+ }
+ }
+ report_fatal_error("Invalid refinement step for -recip.");
+}
+
+/// For the input attribute string, return one of the ReciprocalEstimate enum
+/// status values (enabled, disabled, or not specified) for this operation on
+/// the specified data type.
+static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ SplitString(Override, OverrideVector, ",");
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "none", or "default" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(Override, RefPos, RefSteps)) {
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ }
+
+ // All reciprocal types are enabled.
+ if (Override == "all")
+ return TargetLoweringBase::ReciprocalEstimate::Enabled;
+
+ // All reciprocal types are disabled.
+ if (Override == "none")
+ return TargetLoweringBase::ReciprocalEstimate::Disabled;
+
+ // Target defaults for enablement are used.
+ if (Override == "default")
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTName.pop_back();
+ static const char DisabledPrefix = '!';
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (parseRefinementStep(RecipType, RefPos, RefSteps))
+ RecipType = RecipType.substr(0, RefPos);
+
+ // Ignore the disablement token for string matching.
+ bool IsDisabled = RecipType[0] == DisabledPrefix;
+ if (IsDisabled)
+ RecipType = RecipType.substr(1);
+
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
+ : TargetLoweringBase::ReciprocalEstimate::Enabled;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+/// For the input attribute string, return the customized refinement step count
+/// for this operation on the specified data type. If the step count does not
+/// exist, return the ReciprocalEstimate enum value for unspecified.
+static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
+ if (Override.empty())
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ SmallVector<StringRef, 4> OverrideVector;
+ SplitString(Override, OverrideVector, ",");
+ unsigned NumArgs = OverrideVector.size();
+
+ // Check if "all", "default", or "none" was specified.
+ if (NumArgs == 1) {
+ // Look for an optional setting of the number of refinement steps needed
+ // for this type of reciprocal operation.
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(Override, RefPos, RefSteps))
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+ // Split the string for further processing.
+ Override = Override.substr(0, RefPos);
+ assert(Override != "none" &&
+ "Disabled reciprocals, but specifed refinement steps?");
+
+ // If this is a general override, return the specified number of steps.
+ if (Override == "all" || Override == "default")
+ return RefSteps;
+ }
+
+ // The attribute string may omit the size suffix ('f'/'d').
+ std::string VTName = getReciprocalOpName(IsSqrt, VT);
+ std::string VTNameNoSize = VTName;
+ VTName.pop_back();
+
+ for (StringRef RecipType : OverrideVector) {
+ size_t RefPos;
+ uint8_t RefSteps;
+ if (!parseRefinementStep(RecipType, RefPos, RefSteps))
+ continue;
+
+ RecipType = RecipType.substr(0, RefPos);
+ if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+ return RefSteps;
+ }
+
+ return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
+ MachineFunction &MF) const {
+ return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getDivRefinementSteps(EVT VT,
+ MachineFunction &MF) const {
+ return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
+}
}
SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const {
- SelectionDAG &DAG = DCI.DAG;
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
}
SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
- SelectionDAG &DAG = DCI.DAG;
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const {
EVT VT = Operand.getValueType();
if (VT == MVT::f32) {
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
}
- SDValue getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const override;
virtual SDNode *PostISelFolding(MachineSDNode *N,
SelectionDAG &DAG) const = 0;
TargetLoweringObjectFile.cpp
TargetMachine.cpp
TargetMachineC.cpp
- TargetRecip.cpp
TargetSubtargetInfo.cpp
ADDITIONAL_HEADER_DIRS
setTargetDAGCombine(ISD::FSQRT);
}
- // For the estimates, convergence is quadratic, so we essentially double the
- // number of digits correct after every iteration. For both FRE and FRSQRTE,
- // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
- // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
- unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
- RefinementSteps64 = RefinementSteps + 1;
-
- ReciprocalEstimates.set("sqrtf", true, RefinementSteps);
- ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps);
- ReciprocalEstimates.set("divf", true, RefinementSteps);
- ReciprocalEstimates.set("vec-divf", true, RefinementSteps);
-
- ReciprocalEstimates.set("sqrtd", true, RefinementSteps64);
- ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64);
- ReciprocalEstimates.set("divd", true, RefinementSteps64);
- ReciprocalEstimates.set("vec-divd", true, RefinementSteps64);
-
// Darwin long double math library functions have $LDBL128 appended.
if (Subtarget.isDarwin()) {
setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128");
// Target Optimization Hooks
//===----------------------------------------------------------------------===//
-static std::string getRecipOp(const char *Base, EVT VT) {
- std::string RecipOp(Base);
+static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
+ // For the estimates, convergence is quadratic, so we essentially double the
+ // number of digits correct after every iteration. For both FRE and FRSQRTE,
+ // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+ // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+ int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
if (VT.getScalarType() == MVT::f64)
- RecipOp += "d";
- else
- RecipOp += "f";
-
- if (VT.isVector())
- RecipOp = "vec-" + RecipOp;
-
- return RecipOp;
+ RefinementSteps++;
+ return RefinementSteps;
}
-SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled, int &RefinementSteps,
bool &UseOneConstNR) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- std::string RecipOp = getRecipOp("sqrt", VT);
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
UseOneConstNR = true;
- return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+ return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
-SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+ int Enabled,
+ int &RefinementSteps) const {
EVT VT = Operand.getValueType();
if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
(VT == MVT::f64 && Subtarget.hasFRE()) ||
(VT == MVT::v2f64 && Subtarget.hasVSX()) ||
(VT == MVT::v4f32 && Subtarget.hasQPX()) ||
(VT == MVT::v4f64 && Subtarget.hasQPX())) {
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- std::string RecipOp = getRecipOp("div", VT);
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+ return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
}
return SDValue();
}
SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
- SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const override;
- SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const override;
unsigned combineRepeatedFPDivisors() const override;
CCAssignFn *useFastISelCCs(unsigned Flag) const;
+++ /dev/null
-//===-------------------------- TargetRecip.cpp ---------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetRecip.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-// These are the names of the individual reciprocal operations. These are
-// the key strings for queries and command-line inputs.
-// In addition, the command-line interface recognizes the global parameters
-// "all", "none", and "default".
-static const char *const RecipOps[] = {
- "divd",
- "divf",
- "vec-divd",
- "vec-divf",
- "sqrtd",
- "sqrtf",
- "vec-sqrtd",
- "vec-sqrtf",
-};
-
-/// All operations are disabled by default and refinement steps are set to zero.
-TargetRecip::TargetRecip() {
- unsigned NumStrings = llvm::array_lengthof(RecipOps);
- for (unsigned i = 0; i < NumStrings; ++i)
- RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
-}
-
-static bool parseRefinementStep(StringRef In, size_t &Position,
- uint8_t &Value) {
- const char RefStepToken = ':';
- Position = In.find(RefStepToken);
- if (Position == StringRef::npos)
- return false;
-
- StringRef RefStepString = In.substr(Position + 1);
- // Allow exactly one numeric character for the additional refinement
- // step parameter.
- if (RefStepString.size() == 1) {
- char RefStepChar = RefStepString[0];
- if (RefStepChar >= '0' && RefStepChar <= '9') {
- Value = RefStepChar - '0';
- return true;
- }
- }
- report_fatal_error("Invalid refinement step for -recip.");
-}
-
-bool TargetRecip::parseGlobalParams(const std::string &Arg) {
- StringRef ArgSub = Arg;
-
- // Look for an optional setting of the number of refinement steps needed
- // for this type of reciprocal operation.
- size_t RefPos;
- uint8_t RefSteps;
- StringRef RefStepString;
- if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
- // Split the string for further processing.
- RefStepString = ArgSub.substr(RefPos + 1);
- ArgSub = ArgSub.substr(0, RefPos);
- }
- bool Enable;
- bool UseDefaults;
- if (ArgSub == "all") {
- UseDefaults = false;
- Enable = true;
- } else if (ArgSub == "none") {
- UseDefaults = false;
- Enable = false;
- } else if (ArgSub == "default") {
- UseDefaults = true;
- } else {
- // Any other string is invalid or an individual setting.
- return false;
- }
-
- // All enable values will be initialized to target defaults if 'default' was
- // specified.
- if (!UseDefaults)
- for (auto &KV : RecipMap)
- KV.second.Enabled = Enable;
-
- // Custom refinement count was specified with all, none, or default.
- if (!RefStepString.empty())
- for (auto &KV : RecipMap)
- KV.second.RefinementSteps = RefSteps;
-
- return true;
-}
-
-void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
- static const char DisabledPrefix = '!';
- unsigned NumArgs = Args.size();
-
- for (unsigned i = 0; i != NumArgs; ++i) {
- StringRef Val = Args[i];
-
- bool IsDisabled = Val[0] == DisabledPrefix;
- // Ignore the disablement token for string matching.
- if (IsDisabled)
- Val = Val.substr(1);
-
- size_t RefPos;
- uint8_t RefSteps;
- StringRef RefStepString;
- if (parseRefinementStep(Val, RefPos, RefSteps)) {
- // Split the string for further processing.
- RefStepString = Val.substr(RefPos + 1);
- Val = Val.substr(0, RefPos);
- }
-
- RecipIter Iter = RecipMap.find(Val);
- if (Iter == RecipMap.end()) {
- // Try again specifying float suffix.
- Iter = RecipMap.find(Val.str() + 'f');
- if (Iter == RecipMap.end()) {
- Iter = RecipMap.find(Val.str() + 'd');
- assert(Iter == RecipMap.end() && "Float entry missing from map");
- report_fatal_error("Invalid option for -recip.");
- }
- }
-
- // Mark the matched option as found. Do not allow duplicate specifiers.
- Iter->second.Enabled = !IsDisabled;
- if (!RefStepString.empty())
- Iter->second.RefinementSteps = RefSteps;
-
- // If the precision was not specified, the double entry is also initialized.
- if (Val.back() != 'f' && Val.back() != 'd') {
- RecipParams &Params = RecipMap[Val.str() + 'd'];
- Params.Enabled = !IsDisabled;
- if (!RefStepString.empty())
- Params.RefinementSteps = RefSteps;
- }
- }
-}
-
-void TargetRecip::set(StringRef &RecipString) {
- SmallVector<StringRef, 4> RecipStringVector;
- SplitString(RecipString, RecipStringVector, ",");
- std::vector<std::string> RecipVector;
- for (unsigned i = 0; i < RecipStringVector.size(); ++i)
- RecipVector.push_back(RecipStringVector[i].str());
-
- unsigned NumArgs = RecipVector.size();
-
- // Check if "all", "default", or "none" was specified.
- if (NumArgs == 1 && parseGlobalParams(RecipVector[0]))
- return;
-
- parseIndividualParams(RecipVector);
-}
-
-bool TargetRecip::isEnabled(StringRef Key) const {
- ConstRecipIter Iter = RecipMap.find(Key);
- assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
- return Iter->second.Enabled;
-}
-
-unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
- ConstRecipIter Iter = RecipMap.find(Key);
- assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
- return Iter->second.RefinementSteps;
-}
-
-void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) {
- if (Key == "all") {
- for (auto &KV : RecipMap) {
- RecipParams &RP = KV.second;
- RP.Enabled = Enable;
- RP.RefinementSteps = RefSteps;
- }
- } else {
- RecipParams &RP = RecipMap[Key];
- RP.Enabled = Enable;
- RP.RefinementSteps = RefSteps;
- }
-}
-
-bool TargetRecip::operator==(const TargetRecip &Other) const {
- for (const auto &KV : RecipMap) {
- StringRef Op = KV.first;
- const RecipParams &RP = KV.second;
- const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
- if (RP.RefinementSteps != OtherRP.RefinementSteps)
- return false;
- if (RP.Enabled != OtherRP.Enabled)
- return false;
- }
- return true;
-}
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRecip.h"
#include "X86IntrinsicsInfo.h"
#include <bitset>
#include <numeric>
// X86-SSE is even stranger. It uses -1 or 0 for vector masks.
setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- // By default (and when -ffast-math is on), enable estimate codegen with 1
- // refinement step for floats (not doubles) except scalar division. Scalar
- // division estimates are disabled because they break too much real-world
- // code. These defaults are intended to match GCC behavior.
- ReciprocalEstimates.set("sqrtf", true, 1);
- ReciprocalEstimates.set("divf", false, 1);
- ReciprocalEstimates.set("vec-sqrtf", true, 1);
- ReciprocalEstimates.set("vec-divf", true, 1);
-
// For 64-bit, since we have so many registers, use the ILP scheduler.
// For 32-bit, use the register pressure specific scheduling.
// For Atom, always use ILP scheduling.
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const {
EVT VT = Op.getValueType();
- const char *RecipOp;
// SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
// TODO: Add support for AVX512 (v16f32).
// instructions: convert to single, rsqrtss, convert back to double, refine
// (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget.hasSSE1())
- RecipOp = "sqrtf";
- else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget.hasAVX()))
- RecipOp = "vec-sqrtf";
- else
- return SDValue();
+ if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX())) {
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 1;
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
-
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- UseOneConstNR = false;
- return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ UseOneConstNR = false;
+ return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
}
/// The minimum architected relative accuracy is 2^-12. We need one
/// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
- DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const {
+SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
+ int Enabled,
+ int &RefinementSteps) const {
EVT VT = Op.getValueType();
- const char *RecipOp;
// SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
// TODO: Add support for AVX512 (v16f32).
// 15 instructions: convert to single, rcpss, convert back to double, refine
// (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
// along with FMA, this could be a throughput win.
- if (VT == MVT::f32 && Subtarget.hasSSE1())
- RecipOp = "divf";
- else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
- (VT == MVT::v8f32 && Subtarget.hasAVX()))
- RecipOp = "vec-divf";
- else
- return SDValue();
- TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction());
- if (!Recips.isEnabled(RecipOp))
- return SDValue();
+ if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+ (VT == MVT::v8f32 && Subtarget.hasAVX())) {
+ // Enable estimate codegen with 1 refinement step for vector division.
+ // Scalar division estimates are disabled because they break too much
+ // real-world code. These defaults are intended to match GCC behavior.
+ if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
+ return SDValue();
- RefinementSteps = Recips.getRefinementSteps(RecipOp);
- return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ if (RefinementSteps == ReciprocalEstimate::Unspecified)
+ RefinementSteps = 1;
+
+ return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+ }
+ return SDValue();
}
/// If we have at least two divisions that use the same divisor, convert to
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
/// Use rsqrt* to speed up sqrt calculations.
- SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps,
+ SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps,
bool &UseOneConstNR) const override;
/// Use rcp* to speed up fdiv calculations.
- SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
- unsigned &RefinementSteps) const override;
+ SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+ int &RefinementSteps) const override;
/// Reassociate floating point divisions into multiply by reciprocal.
unsigned combineRepeatedFPDivisors() const override;