From bbcb21daf0bf6842d689da366f42feddabf67f35 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 18 Oct 2016 18:36:49 +0000 Subject: [PATCH] revert r284495: [Target] remove TargetRecip class There's something wrong with the StringRef usage while parsing the attribute string. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284513 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/TargetLowering.h | 63 ++----- include/llvm/Target/TargetOptions.h | 1 + include/llvm/Target/TargetRecip.h | 71 ++++++++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 41 +---- lib/CodeGen/TargetLoweringBase.cpp | 202 ++-------------------- lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 10 +- lib/Target/AMDGPU/AMDGPUISelLowering.h | 10 +- lib/Target/CMakeLists.txt | 1 + lib/Target/PowerPC/PPCISelLowering.cpp | 65 +++++-- lib/Target/PowerPC/PPCISelLowering.h | 8 +- lib/Target/TargetRecip.cpp | 210 +++++++++++++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 71 +++++--- lib/Target/X86/X86ISelLowering.h | 8 +- 13 files changed, 436 insertions(+), 325 deletions(-) create mode 100644 include/llvm/Target/TargetRecip.h create mode 100644 lib/Target/TargetRecip.cpp diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 62379130b84..235098e4213 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -61,6 +61,7 @@ namespace llvm { class MCSymbol; template class SmallVectorImpl; class DataLayout; + struct TargetRecip; class TargetRegisterClass; class TargetLibraryInfo; class TargetLoweringObjectFile; @@ -246,37 +247,6 @@ public: return false; } - /// Reciprocal estimate status values used by the functions below. - enum ReciprocalEstimate : int { - Unspecified = -1, - Disabled = 0, - Enabled = 1 - }; - - /// Return a ReciprocalEstimate enum value for a square root of the given type - /// based on the function's attributes. If the operation is not overridden by - /// the function's attributes, "Unspecified" is returned and target defaults - /// are expected to be used for instruction selection. - int getSqrtEnabled(EVT VT, MachineFunction &MF) const; - - /// Return a ReciprocalEstimate enum value for a division of the given type - /// based on the function's attributes. If the operation is not overridden by - /// the function's attributes, "Unspecified" is returned and target defaults - /// are expected to be used for instruction selection. - int getDivEnabled(EVT VT, MachineFunction &MF) const; - - /// Return the refinement step count for a square root of the given type based - /// on the function's attributes. If the operation is not overridden by - /// the function's attributes, "Unspecified" is returned and target defaults - /// are expected to be used for instruction selection. - int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; - - /// Return the refinement step count for a division of the given type based - /// on the function's attributes. If the operation is not overridden by - /// the function's attributes, "Unspecified" is returned and target defaults - /// are expected to be used for instruction selection. - int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; - /// Returns true if target has indicated at least one type should be bypassed. bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } @@ -569,6 +539,12 @@ public: } } + /// Return the reciprocal estimate code generation preferences for this target + /// after potentially overriding settings using the function's attributes. + /// FIXME: Like all unsafe-math target settings, this should really be an + /// instruction-level attribute/metadata/FMF. + TargetRecip getTargetRecipForFunc(MachineFunction &MF) const; + /// Vector types are broken down into some number of legal first class types. /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 @@ -2182,6 +2158,7 @@ protected: /// sequence of memory operands that is recognized by PrologEpilogInserter. MachineBasicBlock *emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const; + TargetRecip ReciprocalEstimates; }; /// This class defines information used to lower LLVM code to legal SelectionDAG @@ -2994,35 +2971,31 @@ public: /// roots. /// Return a reciprocal square root estimate value for the input operand. - /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or - /// 'Enabled' as set by a potential default override attribute. - /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson - /// refinement iterations required to generate a sufficient (though not - /// necessarily IEEE-754 compliant) estimate is returned in that parameter. + /// The RefinementSteps output is the number of Newton-Raphson refinement + /// iterations required to generate a sufficient (though not necessarily + /// IEEE-754 compliant) estimate for the value type. /// The boolean UseOneConstNR output is used to select a Newton-Raphson /// algorithm implementation that uses one constant or two constants. /// A target may choose to implement its own refinement within this function. /// If that's true, then return '0' as the number of RefinementSteps to avoid /// any further refinement of the estimate. /// An empty SDValue return means no estimate sequence can be created. - virtual SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, - int Enabled, int &RefinementSteps, + virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const { return SDValue(); } /// Return a reciprocal estimate value for the input operand. - /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or - /// 'Enabled' as set by a potential default override attribute. - /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson - /// refinement iterations required to generate a sufficient (though not - /// necessarily IEEE-754 compliant) estimate is returned in that parameter. + /// The RefinementSteps output is the number of Newton-Raphson refinement + /// iterations required to generate a sufficient (though not necessarily + /// IEEE-754 compliant) estimate for the value type. /// A target may choose to implement its own refinement within this function. /// If that's true, then return '0' as the number of RefinementSteps to avoid /// any further refinement of the estimate. /// An empty SDValue return means no estimate sequence can be created. - virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, - int Enabled, int &RefinementSteps) const { + virtual SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { return SDValue(); } diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h index 9278ab7ca18..97c538be4dc 100644 --- a/include/llvm/Target/TargetOptions.h +++ b/include/llvm/Target/TargetOptions.h @@ -15,6 +15,7 @@ #ifndef LLVM_TARGET_TARGETOPTIONS_H #define LLVM_TARGET_TARGETOPTIONS_H +#include "llvm/Target/TargetRecip.h" #include "llvm/MC/MCTargetOptions.h" namespace llvm { diff --git a/include/llvm/Target/TargetRecip.h b/include/llvm/Target/TargetRecip.h new file mode 100644 index 00000000000..d3139dc0d03 --- /dev/null +++ b/include/llvm/Target/TargetRecip.h @@ -0,0 +1,71 @@ +//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is used to customize machine-specific reciprocal estimate code +// generation in a target-independent way. +// If a target does not support operations in this specification, then code +// generation will default to using supported operations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TARGET_TARGETRECIP_H +#define LLVM_TARGET_TARGETRECIP_H + +#include +#include +#include +#include + +namespace llvm { + +class StringRef; + +struct TargetRecip { +public: + TargetRecip(); + + /// Parse a comma-separated string of reciprocal settings to set values in + /// this struct. + void set(StringRef &Args); + + /// Set enablement and refinement steps for a particular reciprocal operation. + /// Use "all" to give all operations the same values. + void set(StringRef Key, bool Enable, unsigned RefSteps); + + /// Return true if the reciprocal operation has been enabled. + bool isEnabled(StringRef Key) const; + + /// Return the number of iterations necessary to refine the + /// the result of a machine instruction for the given reciprocal operation. + unsigned getRefinementSteps(StringRef Key) const; + + bool operator==(const TargetRecip &Other) const; + +private: + // TODO: We should be able to use special values (enums) to simplify this into + // just an int, but we have to be careful because the user is allowed to + // specify "default" as a setting and just change the refinement step count. + struct RecipParams { + bool Enabled; + int8_t RefinementSteps; + + RecipParams() : Enabled(false), RefinementSteps(0) {} + }; + + std::map RecipMap; + typedef std::map::iterator RecipIter; + typedef std::map::const_iterator ConstRecipIter; + + bool parseGlobalParams(const std::string &Arg); + void parseIndividualParams(const std::vector &Args); +}; + +} // end namespace llvm + +#endif // LLVM_TARGET_TARGETRECIP_H diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index dd7328ccab0..aa63889a7d7 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14870,21 +14870,11 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { if (Level >= AfterLegalizeDAG) return SDValue(); - // TODO: Handle half and/or extended types? - EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) - return SDValue(); + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); - // If estimates are explicitly disabled for this function, we're done. - MachineFunction &MF = DAG.getMachineFunction(); - int Enabled = TLI.getDivEnabled(VT, MF); - if (Enabled == TLI.ReciprocalEstimate::Disabled) - return SDValue(); - - // Estimates may be explicitly enabled for this type with a custom number of - // refinement steps. - int Iterations = TLI.getDivRefinementSteps(VT, MF); - if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) { + unsigned Iterations = 0; + if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) { if (Iterations) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: @@ -14899,7 +14889,7 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) { AddToWorklist(Est.getNode()); // Newton iterations: Est = Est + Est (1 - Arg * Est) - for (int i = 0; i < Iterations; ++i) { + for (unsigned i = 0; i < Iterations; ++i) { SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags); AddToWorklist(NewEst.getNode()); @@ -15021,24 +15011,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, if (Level >= AfterLegalizeDAG) return SDValue(); - // TODO: Handle half and/or extended types? - EVT VT = Op.getValueType(); - if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64) - return SDValue(); - - // If estimates are explicitly disabled for this function, we're done. - MachineFunction &MF = DAG.getMachineFunction(); - int Enabled = TLI.getSqrtEnabled(VT, MF); - if (Enabled == TLI.ReciprocalEstimate::Disabled) - return SDValue(); - - // Estimates may be explicitly enabled for this type with a custom number of - // refinement steps. - int Iterations = TLI.getSqrtRefinementSteps(VT, MF); - + // Expose the DAG combiner to the target combiner implementations. + TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this); + unsigned Iterations = 0; bool UseOneConstNR = false; - if (SDValue Est = - TLI.getRsqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR)) { + if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) { AddToWorklist(Est.getNode()); if (Iterations) { Est = UseOneConstNR diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index a62c2a4e2f6..f62e74ad398 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -14,7 +14,6 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -838,6 +837,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) { InitLibcallNames(LibcallRoutineNames, TM.getTargetTriple()); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); + ReciprocalEstimates.set("all", false, 0); } void TargetLoweringBase::initActions() { @@ -1485,6 +1485,22 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { return MVT::i32; // return the default value } +TargetRecip +TargetLoweringBase::getTargetRecipForFunc(MachineFunction &MF) const { + const Function *F = MF.getFunction(); + StringRef RecipAttrName = "reciprocal-estimates"; + if (!F->hasFnAttribute(RecipAttrName)) + return ReciprocalEstimates; + + // Make a copy of the target's default reciprocal codegen settings. + TargetRecip Recips = ReciprocalEstimates; + + // Override any settings that are customized for this function. + StringRef RecipString = F->getFnAttribute(RecipAttrName).getValueAsString(); + Recips.set(RecipString); + return Recips; +} + /// getVectorTypeBreakdown - Vector types are broken down into some number of /// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 /// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. @@ -1875,187 +1891,3 @@ unsigned TargetLoweringBase::getMaximumJumpTableSize() const { void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) { MaximumJumpTableSize = Val; } - -//===----------------------------------------------------------------------===// -// Reciprocal Estimates -//===----------------------------------------------------------------------===// - -/// Get the reciprocal estimate attribute string for a function that will -/// override the target defaults. -static StringRef getRecipEstimateForFunc(MachineFunction &MF) { - const Function *F = MF.getFunction(); - StringRef RecipAttrName = "reciprocal-estimates"; - if (!F->hasFnAttribute(RecipAttrName)) - return StringRef(); - - return F->getFnAttribute(RecipAttrName).getValueAsString(); -} - -/// Construct a string for the given reciprocal operation of the given type. -/// This string should match the corresponding option to the front-end's -/// "-mrecip" flag assuming those strings have been passed through in an -/// attribute string. For example, "vec-divf" for a division of a vXf32. -static std::string getReciprocalOpName(bool IsSqrt, EVT VT) { - std::string Name = VT.isVector() ? "vec-" : ""; - - Name += IsSqrt ? "sqrt" : "div"; - - // TODO: Handle "half" or other float types? - if (VT.getScalarType() == MVT::f64) { - Name += "d"; - } else { - assert(VT.getScalarType() == MVT::f32 && - "Unexpected FP type for reciprocal estimate"); - Name += "f"; - } - - return Name; -} - -/// Return the character position and value (a single numeric character) of a -/// customized refinement operation in the input string if it exists. Return -/// false if there is no customized refinement step count. -static bool parseRefinementStep(StringRef In, size_t &Position, - uint8_t &Value) { - const char RefStepToken = ':'; - Position = In.find(RefStepToken); - if (Position == StringRef::npos) - return false; - - StringRef RefStepString = In.substr(Position + 1); - // Allow exactly one numeric character for the additional refinement - // step parameter. - if (RefStepString.size() == 1) { - char RefStepChar = RefStepString[0]; - if (RefStepChar >= '0' && RefStepChar <= '9') { - Value = RefStepChar - '0'; - return true; - } - } - report_fatal_error("Invalid refinement step for -recip."); -} - -/// For the input attribute string, return one of the ReciprocalEstimate enum -/// status values (enabled, disabled, or not specified) for this operation on -/// the specified data type. -static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) { - if (Override.empty()) - return TargetLoweringBase::ReciprocalEstimate::Unspecified; - - SmallVector OverrideVector; - SplitString(Override, OverrideVector, ","); - unsigned NumArgs = OverrideVector.size(); - - // Check if "all", "none", or "default" was specified. - if (NumArgs == 1) { - // Look for an optional setting of the number of refinement steps needed - // for this type of reciprocal operation. - size_t RefPos; - uint8_t RefSteps; - if (parseRefinementStep(Override, RefPos, RefSteps)) { - // Split the string for further processing. - Override = Override.substr(0, RefPos); - } - - // All reciprocal types are enabled. - if (Override == "all") - return TargetLoweringBase::ReciprocalEstimate::Enabled; - - // All reciprocal types are disabled. - if (Override == "none") - return TargetLoweringBase::ReciprocalEstimate::Disabled; - - // Target defaults for enablement are used. - if (Override == "default") - return TargetLoweringBase::ReciprocalEstimate::Unspecified; - } - - // The attribute string may omit the size suffix ('f'/'d'). - StringRef VTName = getReciprocalOpName(IsSqrt, VT); - StringRef VTNameNoSize = VTName.drop_back(); - static const char DisabledPrefix = '!'; - - for (StringRef RecipType : OverrideVector) { - size_t RefPos; - uint8_t RefSteps; - if (parseRefinementStep(RecipType, RefPos, RefSteps)) - RecipType = RecipType.substr(0, RefPos); - - // Ignore the disablement token for string matching. - bool IsDisabled = RecipType[0] == DisabledPrefix; - if (IsDisabled) - RecipType = RecipType.substr(1); - - if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) - return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled - : TargetLoweringBase::ReciprocalEstimate::Enabled; - } - - return TargetLoweringBase::ReciprocalEstimate::Unspecified; -} - -/// For the input attribute string, return the customized refinement step count -/// for this operation on the specified data type. If the step count does not -/// exist, return the ReciprocalEstimate enum value for unspecified. -static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) { - if (Override.empty()) - return TargetLoweringBase::ReciprocalEstimate::Unspecified; - - SmallVector OverrideVector; - SplitString(Override, OverrideVector, ","); - unsigned NumArgs = OverrideVector.size(); - - // Check if "all", "default", or "none" was specified. - if (NumArgs == 1) { - // Look for an optional setting of the number of refinement steps needed - // for this type of reciprocal operation. - size_t RefPos; - uint8_t RefSteps; - if (!parseRefinementStep(Override, RefPos, RefSteps)) - return TargetLoweringBase::ReciprocalEstimate::Unspecified; - - // Split the string for further processing. - Override = Override.substr(0, RefPos); - assert(Override != "none" && - "Disabled reciprocals, but specifed refinement steps?"); - - // If this is a general override, return the specified number of steps. - if (Override == "all" || Override == "default") - return RefSteps; - } - - // The attribute string may omit the size suffix ('f'/'d'). - StringRef VTName = getReciprocalOpName(IsSqrt, VT); - StringRef VTNameNoSize = VTName.drop_back(); - - for (StringRef RecipType : OverrideVector) { - size_t RefPos; - uint8_t RefSteps; - if (!parseRefinementStep(RecipType, RefPos, RefSteps)) - continue; - - RecipType = RecipType.substr(0, RefPos); - if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize)) - return RefSteps; - } - - return TargetLoweringBase::ReciprocalEstimate::Unspecified; -} - -int TargetLoweringBase::getSqrtEnabled(EVT VT, MachineFunction &MF) const { - return getOpEnabled(true, VT, getRecipEstimateForFunc(MF)); -} - -int TargetLoweringBase::getDivEnabled(EVT VT, MachineFunction &MF) const { - return getOpEnabled(false, VT, getRecipEstimateForFunc(MF)); -} - -int TargetLoweringBase::getSqrtRefinementSteps(EVT VT, - MachineFunction &MF) const { - return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF)); -} - -int TargetLoweringBase::getDivRefinementSteps(EVT VT, - MachineFunction &MF) const { - return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF)); -} diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index ac6c7e33fb3..03833609a77 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -2859,9 +2859,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { } SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand, - SelectionDAG &DAG, int Enabled, - int &RefinementSteps, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const { + SelectionDAG &DAG = DCI.DAG; EVT VT = Operand.getValueType(); if (VT == MVT::f32) { @@ -2876,8 +2877,9 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand, } SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, - SelectionDAG &DAG, int Enabled, - int &RefinementSteps) const { + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { + SelectionDAG &DAG = DCI.DAG; EVT VT = Operand.getValueType(); if (VT == MVT::f32) { diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index 4cc1a74d18b..fc042b28180 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -164,11 +164,13 @@ public: bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { return true; } - SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, - int &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, - int &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const override; virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const = 0; diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 1805437b12f..e6d0199952f 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMTarget TargetLoweringObjectFile.cpp TargetMachine.cpp TargetMachineC.cpp + TargetRecip.cpp TargetSubtargetInfo.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 55e299cdc9d..05e823d7f16 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -901,6 +901,23 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::FSQRT); } + // For the estimates, convergence is quadratic, so we essentially double the + // number of digits correct after every iteration. For both FRE and FRSQRTE, + // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), + // this is 2^-14. IEEE float has 23 digits and double has 52 digits. + unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, + RefinementSteps64 = RefinementSteps + 1; + + ReciprocalEstimates.set("sqrtf", true, RefinementSteps); + ReciprocalEstimates.set("vec-sqrtf", true, RefinementSteps); + ReciprocalEstimates.set("divf", true, RefinementSteps); + ReciprocalEstimates.set("vec-divf", true, RefinementSteps); + + ReciprocalEstimates.set("sqrtd", true, RefinementSteps64); + ReciprocalEstimates.set("vec-sqrtd", true, RefinementSteps64); + ReciprocalEstimates.set("divd", true, RefinementSteps64); + ReciprocalEstimates.set("vec-divd", true, RefinementSteps64); + // Darwin long double math library functions have $LDBL128 appended. if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); @@ -9622,19 +9639,22 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, // Target Optimization Hooks //===----------------------------------------------------------------------===// -static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) { - // For the estimates, convergence is quadratic, so we essentially double the - // number of digits correct after every iteration. For both FRE and FRSQRTE, - // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), - // this is 2^-14. IEEE float has 23 digits and double has 52 digits. - int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3; +static std::string getRecipOp(const char *Base, EVT VT) { + std::string RecipOp(Base); if (VT.getScalarType() == MVT::f64) - RefinementSteps++; - return RefinementSteps; + RecipOp += "d"; + else + RecipOp += "f"; + + if (VT.isVector()) + RecipOp = "vec-" + RecipOp; + + return RecipOp; } -SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, - int Enabled, int &RefinementSteps, +SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || @@ -9643,18 +9663,21 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - if (RefinementSteps == ReciprocalEstimate::Unspecified) - RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); + TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); + std::string RecipOp = getRecipOp("sqrt", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + RefinementSteps = Recips.getRefinementSteps(RecipOp); UseOneConstNR = true; - return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); + return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand); } return SDValue(); } -SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, - int Enabled, - int &RefinementSteps) const { +SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { EVT VT = Operand.getValueType(); if ((VT == MVT::f32 && Subtarget.hasFRES()) || (VT == MVT::f64 && Subtarget.hasFRE()) || @@ -9662,9 +9685,13 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG, (VT == MVT::v2f64 && Subtarget.hasVSX()) || (VT == MVT::v4f32 && Subtarget.hasQPX()) || (VT == MVT::v4f64 && Subtarget.hasQPX())) { - if (RefinementSteps == ReciprocalEstimate::Unspecified) - RefinementSteps = getEstimateRefinementSteps(VT, Subtarget); - return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); + TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); + std::string RecipOp = getRecipOp("div", VT); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); + return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand); } return SDValue(); } diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 3d4ec27c5a6..bdd658a8413 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -967,11 +967,11 @@ namespace llvm { SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, - int &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const override; - SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, - int &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const override; unsigned combineRepeatedFPDivisors() const override; CCAssignFn *useFastISelCCs(unsigned Flag) const; diff --git a/lib/Target/TargetRecip.cpp b/lib/Target/TargetRecip.cpp new file mode 100644 index 00000000000..938ed9f3240 --- /dev/null +++ b/lib/Target/TargetRecip.cpp @@ -0,0 +1,210 @@ +//===-------------------------- TargetRecip.cpp ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This class is used to customize machine-specific reciprocal estimate code +// generation in a target-independent way. +// If a target does not support operations in this specification, then code +// generation will default to using supported operations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetRecip.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +// These are the names of the individual reciprocal operations. These are +// the key strings for queries and command-line inputs. +// In addition, the command-line interface recognizes the global parameters +// "all", "none", and "default". +static const char *const RecipOps[] = { + "divd", + "divf", + "vec-divd", + "vec-divf", + "sqrtd", + "sqrtf", + "vec-sqrtd", + "vec-sqrtf", +}; + +/// All operations are disabled by default and refinement steps are set to zero. +TargetRecip::TargetRecip() { + unsigned NumStrings = llvm::array_lengthof(RecipOps); + for (unsigned i = 0; i < NumStrings; ++i) + RecipMap.insert(std::make_pair(RecipOps[i], RecipParams())); +} + +static bool parseRefinementStep(StringRef In, size_t &Position, + uint8_t &Value) { + const char RefStepToken = ':'; + Position = In.find(RefStepToken); + if (Position == StringRef::npos) + return false; + + StringRef RefStepString = In.substr(Position + 1); + // Allow exactly one numeric character for the additional refinement + // step parameter. + if (RefStepString.size() == 1) { + char RefStepChar = RefStepString[0]; + if (RefStepChar >= '0' && RefStepChar <= '9') { + Value = RefStepChar - '0'; + return true; + } + } + report_fatal_error("Invalid refinement step for -recip."); +} + +bool TargetRecip::parseGlobalParams(const std::string &Arg) { + StringRef ArgSub = Arg; + + // Look for an optional setting of the number of refinement steps needed + // for this type of reciprocal operation. + size_t RefPos; + uint8_t RefSteps; + StringRef RefStepString; + if (parseRefinementStep(ArgSub, RefPos, RefSteps)) { + // Split the string for further processing. + RefStepString = ArgSub.substr(RefPos + 1); + ArgSub = ArgSub.substr(0, RefPos); + } + bool Enable; + bool UseDefaults; + if (ArgSub == "all") { + UseDefaults = false; + Enable = true; + } else if (ArgSub == "none") { + UseDefaults = false; + Enable = false; + } else if (ArgSub == "default") { + UseDefaults = true; + } else { + // Any other string is invalid or an individual setting. + return false; + } + + // All enable values will be initialized to target defaults if 'default' was + // specified. + if (!UseDefaults) + for (auto &KV : RecipMap) + KV.second.Enabled = Enable; + + // Custom refinement count was specified with all, none, or default. + if (!RefStepString.empty()) + for (auto &KV : RecipMap) + KV.second.RefinementSteps = RefSteps; + + return true; +} + +void TargetRecip::parseIndividualParams(const std::vector &Args) { + static const char DisabledPrefix = '!'; + unsigned NumArgs = Args.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + StringRef Val = Args[i]; + + bool IsDisabled = Val[0] == DisabledPrefix; + // Ignore the disablement token for string matching. + if (IsDisabled) + Val = Val.substr(1); + + size_t RefPos; + uint8_t RefSteps; + StringRef RefStepString; + if (parseRefinementStep(Val, RefPos, RefSteps)) { + // Split the string for further processing. + RefStepString = Val.substr(RefPos + 1); + Val = Val.substr(0, RefPos); + } + + RecipIter Iter = RecipMap.find(Val); + if (Iter == RecipMap.end()) { + // Try again specifying float suffix. + Iter = RecipMap.find(Val.str() + 'f'); + if (Iter == RecipMap.end()) { + Iter = RecipMap.find(Val.str() + 'd'); + assert(Iter == RecipMap.end() && "Float entry missing from map"); + report_fatal_error("Invalid option for -recip."); + } + } + + // Mark the matched option as found. Do not allow duplicate specifiers. + Iter->second.Enabled = !IsDisabled; + if (!RefStepString.empty()) + Iter->second.RefinementSteps = RefSteps; + + // If the precision was not specified, the double entry is also initialized. + if (Val.back() != 'f' && Val.back() != 'd') { + RecipParams &Params = RecipMap[Val.str() + 'd']; + Params.Enabled = !IsDisabled; + if (!RefStepString.empty()) + Params.RefinementSteps = RefSteps; + } + } +} + +void TargetRecip::set(StringRef &RecipString) { + SmallVector RecipStringVector; + SplitString(RecipString, RecipStringVector, ","); + std::vector RecipVector; + for (unsigned i = 0; i < RecipStringVector.size(); ++i) + RecipVector.push_back(RecipStringVector[i].str()); + + unsigned NumArgs = RecipVector.size(); + + // Check if "all", "default", or "none" was specified. + if (NumArgs == 1 && parseGlobalParams(RecipVector[0])) + return; + + parseIndividualParams(RecipVector); +} + +bool TargetRecip::isEnabled(StringRef Key) const { + ConstRecipIter Iter = RecipMap.find(Key); + assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); + return Iter->second.Enabled; +} + +unsigned TargetRecip::getRefinementSteps(StringRef Key) const { + ConstRecipIter Iter = RecipMap.find(Key); + assert(Iter != RecipMap.end() && "Unknown name for reciprocal map"); + return Iter->second.RefinementSteps; +} + +void TargetRecip::set(StringRef Key, bool Enable, unsigned RefSteps) { + if (Key == "all") { + for (auto &KV : RecipMap) { + RecipParams &RP = KV.second; + RP.Enabled = Enable; + RP.RefinementSteps = RefSteps; + } + } else { + RecipParams &RP = RecipMap[Key]; + RP.Enabled = Enable; + RP.RefinementSteps = RefSteps; + } +} + +bool TargetRecip::operator==(const TargetRecip &Other) const { + for (const auto &KV : RecipMap) { + StringRef Op = KV.first; + const RecipParams &RP = KV.second; + const RecipParams &OtherRP = Other.RecipMap.find(Op)->second; + if (RP.RefinementSteps != OtherRP.RefinementSteps) + return false; + if (RP.Enabled != OtherRP.Enabled) + return false; + } + return true; +} diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b1f26109154..c1b6a2204cd 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -53,6 +53,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRecip.h" #include "X86IntrinsicsInfo.h" #include #include @@ -84,6 +85,15 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, // X86-SSE is even stranger. It uses -1 or 0 for vector masks. setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + // By default (and when -ffast-math is on), enable estimate codegen with 1 + // refinement step for floats (not doubles) except scalar division. Scalar + // division estimates are disabled because they break too much real-world + // code. These defaults are intended to match GCC behavior. + ReciprocalEstimates.set("sqrtf", true, 1); + ReciprocalEstimates.set("divf", false, 1); + ReciprocalEstimates.set("vec-sqrtf", true, 1); + ReciprocalEstimates.set("vec-divf", true, 1); + // For 64-bit, since we have so many registers, use the ILP scheduler. // For 32-bit, use the register pressure specific scheduling. // For Atom, always use ILP scheduling. @@ -15241,10 +15251,11 @@ bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, - SelectionDAG &DAG, int Enabled, - int &RefinementSteps, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const { EVT VT = Op.getValueType(); + const char *RecipOp; // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps. // TODO: Add support for AVX512 (v16f32). @@ -15253,24 +15264,30 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op, // instructions: convert to single, rsqrtss, convert back to double, refine // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA // along with FMA, this could be a throughput win. - if ((VT == MVT::f32 && Subtarget.hasSSE1()) || - (VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) { - if (RefinementSteps == ReciprocalEstimate::Unspecified) - RefinementSteps = 1; + if (VT == MVT::f32 && Subtarget.hasSSE1()) + RecipOp = "sqrtf"; + else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) + RecipOp = "vec-sqrtf"; + else + return SDValue(); - UseOneConstNR = false; - return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); - } - return SDValue(); + TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); + + RefinementSteps = Recips.getRefinementSteps(RecipOp); + UseOneConstNR = false; + return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op); } /// The minimum architected relative accuracy is 2^-12. We need one /// Newton-Raphson step to have a good float result (24 bits of precision). -SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, - int Enabled, - int &RefinementSteps) const { +SDValue X86TargetLowering::getRecipEstimate(SDValue Op, + DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const { EVT VT = Op.getValueType(); + const char *RecipOp; // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps. // TODO: Add support for AVX512 (v16f32). @@ -15279,22 +15296,20 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, // 15 instructions: convert to single, rcpss, convert back to double, refine // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA // along with FMA, this could be a throughput win. + if (VT == MVT::f32 && Subtarget.hasSSE1()) + RecipOp = "divf"; + else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) || + (VT == MVT::v8f32 && Subtarget.hasAVX())) + RecipOp = "vec-divf"; + else + return SDValue(); - if ((VT == MVT::f32 && Subtarget.hasSSE1()) || - (VT == MVT::v4f32 && Subtarget.hasSSE1()) || - (VT == MVT::v8f32 && Subtarget.hasAVX())) { - // Enable estimate codegen with 1 refinement step for vector division. - // Scalar division estimates are disabled because they break too much - // real-world code. These defaults are intended to match GCC behavior. - if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) - return SDValue(); - - if (RefinementSteps == ReciprocalEstimate::Unspecified) - RefinementSteps = 1; + TargetRecip Recips = getTargetRecipForFunc(DCI.DAG.getMachineFunction()); + if (!Recips.isEnabled(RecipOp)) + return SDValue(); - return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); - } - return SDValue(); + RefinementSteps = Recips.getRefinementSteps(RecipOp); + return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op); } /// If we have at least two divisions that use the same divisor, convert to diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 83c0d010c24..09d6ee47393 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1266,13 +1266,13 @@ namespace llvm { bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override; /// Use rsqrt* to speed up sqrt calculations. - SDValue getRsqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, - int &RefinementSteps, + SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps, bool &UseOneConstNR) const override; /// Use rcp* to speed up fdiv calculations. - SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled, - int &RefinementSteps) const override; + SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI, + unsigned &RefinementSteps) const override; /// Reassociate floating point divisions into multiply by reciprocal. unsigned combineRepeatedFPDivisors() const override; -- 2.50.1