From: Eugene Zelenko Date: Fri, 1 Sep 2017 21:37:29 +0000 (+0000) Subject: [Analysis, Transforms] Fix some Clang-tidy modernize and Include What You Use warning... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=cecd8f18e21715f6a4aaa374a38100943a3f811c;p=llvm [Analysis, Transforms] Fix some Clang-tidy modernize and Include What You Use warnings; other minor fixes (NFC). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312383 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/Analysis/AssumptionCache.h b/include/llvm/Analysis/AssumptionCache.h index 58d72afdc1b..c965e62a021 100644 --- a/include/llvm/Analysis/AssumptionCache.h +++ b/include/llvm/Analysis/AssumptionCache.h @@ -1,4 +1,4 @@ -//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume ---*- C++ -*-===// +//===- llvm/Analysis/AssumptionCache.h - Track @llvm.assume -----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -18,9 +18,8 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Instructions.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" @@ -28,6 +27,11 @@ namespace llvm { +class CallInst; +class Function; +class raw_ostream; +class Value; + /// \brief A cache of @llvm.assume calls within a function. /// /// This cache provides fast lookup of assumptions within a function by caching @@ -47,6 +51,7 @@ class AssumptionCache { class AffectedValueCallbackVH final : public CallbackVH { AssumptionCache *AC; + void deleted() override; void allUsesReplacedWith(Value *) override; @@ -76,7 +81,7 @@ class AssumptionCache { /// /// We want to be as lazy about this as possible, and so we scan the function /// at the last moment. - bool Scanned; + bool Scanned = false; /// \brief Scan the function for assumptions and add them to the cache. void scanFunction(); @@ -84,7 +89,7 @@ class AssumptionCache { public: /// \brief Construct an AssumptionCache from a function by scanning all of /// its instructions. - AssumptionCache(Function &F) : F(F), Scanned(false) {} + AssumptionCache(Function &F) : F(F) {} /// This cache is designed to be self-updating and so it should never be /// invalidated. @@ -145,10 +150,11 @@ public: /// assumption caches for a given function. class AssumptionAnalysis : public AnalysisInfoMixin { friend AnalysisInfoMixin; + static AnalysisKey Key; public: - typedef AssumptionCache Result; + using Result = AssumptionCache; AssumptionCache run(Function &F, FunctionAnalysisManager &) { return AssumptionCache(F); @@ -161,6 +167,7 @@ class AssumptionPrinterPass : public PassInfoMixin { public: explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); }; @@ -177,10 +184,11 @@ class AssumptionCacheTracker : public ImmutablePass { /// delete our cache of intrinsics for a function when it is deleted. class FunctionCallbackVH final : public CallbackVH { AssumptionCacheTracker *ACT; + void deleted() override; public: - typedef DenseMapInfo DMI; + using DMI = DenseMapInfo; FunctionCallbackVH(Value *V, AssumptionCacheTracker *ACT = nullptr) : CallbackVH(V), ACT(ACT) {} @@ -188,8 +196,10 @@ class AssumptionCacheTracker : public ImmutablePass { friend FunctionCallbackVH; - typedef DenseMap, - FunctionCallbackVH::DMI> FunctionCallsMap; + using FunctionCallsMap = + DenseMap, + FunctionCallbackVH::DMI>; + FunctionCallsMap AssumptionCaches; public: @@ -208,6 +218,7 @@ public: } void verifyAnalysis() const override; + bool doFinalization(Module &) override { verifyAnalysis(); return false; @@ -218,4 +229,4 @@ public: } // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_ASSUMPTIONCACHE_H diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index b42ce46e840..2705eb3f3c4 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -15,32 +15,32 @@ #ifndef LLVM_ANALYSIS_VALUETRACKING_H #define LLVM_ANALYSIS_VALUETRACKING_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/Instruction.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/DataTypes.h" +#include "llvm/IR/Intrinsics.h" +#include +#include namespace llvm { -template class ArrayRef; - class APInt; - class AddOperator; - class AssumptionCache; - class DataLayout; - class DominatorTree; - class GEPOperator; - class Instruction; - struct KnownBits; - class Loop; - class LoopInfo; - class OptimizationRemarkEmitter; - class MDNode; - class StringRef; - class TargetLibraryInfo; - class Value; - - namespace Intrinsic { - enum ID : unsigned; - } + +class AddOperator; +class APInt; +class AssumptionCache; +class DataLayout; +class DominatorTree; +class GEPOperator; +class IntrinsicInst; +struct KnownBits; +class Loop; +class LoopInfo; +class MDNode; +class OptimizationRemarkEmitter; +class StringRef; +class TargetLibraryInfo; +class Value; /// Determine which bits of V are known to be either zero or one and return /// them in the KnownZero/KnownOne bit sets. @@ -56,17 +56,20 @@ template class ArrayRef; const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr); + /// Returns the known bits rather than passing by reference. KnownBits computeKnownBits(const Value *V, const DataLayout &DL, unsigned Depth = 0, AssumptionCache *AC = nullptr, const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr, OptimizationRemarkEmitter *ORE = nullptr); + /// Compute known bits from the range metadata. /// \p KnownZero the set of bits that are known to be zero /// \p KnownOne the set of bits that are known to be one void computeKnownBitsFromRangeMetadata(const MDNode &Ranges, KnownBits &Known); + /// Return true if LHS and RHS have no common bits set. bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -180,7 +183,6 @@ template class ArrayRef; /// -0 --> true /// x > +0 --> true /// x < -0 --> false - /// bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI); /// Return true if we can prove that the specified FP value's sign bit is 0. @@ -190,7 +192,6 @@ template class ArrayRef; /// -0 --> false /// x > +0 --> true /// x < -0 --> false - /// bool SignBitMustBeZero(const Value *V, const TargetLibraryInfo *TLI); /// If the specified value can be set by repeating the same byte in memory, @@ -231,8 +232,10 @@ template class ArrayRef; /// ConstantDataArray pointer. nullptr indicates a zeroinitializer (a valid /// initializer, it just doesn't fit the ConstantDataArray interface). const ConstantDataArray *Array; + /// Slice starts at this Offset. uint64_t Offset; + /// Length of the slice. uint64_t Length; @@ -242,6 +245,7 @@ template class ArrayRef; Offset += Delta; Length -= Delta; } + /// Convenience accessor for elements in the slice. uint64_t operator[](unsigned I) const { return Array==nullptr ? 0 : Array->getElementAsInteger(I + Offset); @@ -378,6 +382,7 @@ template class ArrayRef; const DominatorTree *DT = nullptr); enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows }; + OverflowResult computeOverflowForUnsignedMul(const Value *LHS, const Value *RHS, const DataLayout &DL, @@ -466,6 +471,7 @@ template class ArrayRef; SPF_ABS, /// Absolute value SPF_NABS /// Negated absolute value }; + /// \brief Behavior when a floating point min/max is given one NaN and one /// non-NaN as input. enum SelectPatternNaNBehavior { @@ -476,6 +482,7 @@ template class ArrayRef; /// it has been determined that no operands can /// be NaN). }; + struct SelectPatternResult { SelectPatternFlavor Flavor; SelectPatternNaNBehavior NaNBehavior; /// Only applicable if Flavor is @@ -489,6 +496,7 @@ template class ArrayRef; return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS); } }; + /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind /// and providing the out parameter results if we successfully match. /// @@ -532,4 +540,4 @@ template class ArrayRef; unsigned Depth = 0); } // end namespace llvm -#endif +#endif // LLVM_ANALYSIS_VALUETRACKING_H diff --git a/include/llvm/Transforms/Scalar/SROA.h b/include/llvm/Transforms/Scalar/SROA.h index 3080b75ba89..0bd592f0dae 100644 --- a/include/llvm/Transforms/Scalar/SROA.h +++ b/include/llvm/Transforms/Scalar/SROA.h @@ -17,15 +17,23 @@ #define LLVM_TRANSFORMS_SCALAR_SROA_H #include "llvm/ADT/SetVector.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/IR/PassManager.h" #include "llvm/Support/Compiler.h" #include namespace llvm { +class AllocaInst; +class AssumptionCache; +class DominatorTree; +class Function; +class Instruction; +class LLVMContext; +class PHINode; +class SelectInst; +class Use; + /// A private "module" namespace for types and utilities used by SROA. These /// are implementation details and should not be used by clients. namespace sroa LLVM_LIBRARY_VISIBILITY { diff --git a/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/include/llvm/Transforms/Vectorize/SLPVectorizer.h index a4fa3f32312..06f6fa11a94 100644 --- a/include/llvm/Transforms/Vectorize/SLPVectorizer.h +++ b/include/llvm/Transforms/Vectorize/SLPVectorizer.h @@ -1,4 +1,4 @@ -//===---- SLPVectorizer.h ---------------------------------------*- C++ -*-===// +//===- SLPVectorizer.h ------------------------------------------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -19,30 +19,48 @@ #ifndef LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H #define LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/DemandedBits.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Function.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/ValueHandle.h" namespace llvm { +class AssumptionCache; +class BasicBlock; +class CmpInst; +class DataLayout; +class DemandedBits; +class DominatorTree; +class Function; +class InsertElementInst; +class InsertValueInst; +class Instruction; +class LoopInfo; +class OptimizationRemarkEmitter; +class PHINode; +class ScalarEvolution; +class StoreInst; +class TargetLibraryInfo; +class TargetTransformInfo; +class Value; + /// A private "module" namespace for types and utilities used by this pass. /// These are implementation details and should not be used by clients. namespace slpvectorizer { + class BoUpSLP; -} + +} // end namespace slpvectorizer struct SLPVectorizerPass : public PassInfoMixin { - typedef SmallVector StoreList; - typedef MapVector StoreListMap; - typedef SmallVector WeakTrackingVHList; - typedef MapVector WeakTrackingVHListMap; + using StoreList = SmallVector; + using StoreListMap = MapVector; + using WeakTrackingVHList = SmallVector; + using WeakTrackingVHListMap = MapVector; ScalarEvolution *SE = nullptr; TargetTransformInfo *TTI = nullptr; @@ -103,11 +121,14 @@ private: /// Try to vectorize trees that start at insertvalue instructions. bool vectorizeInsertValueInst(InsertValueInst *IVI, BasicBlock *BB, slpvectorizer::BoUpSLP &R); + /// Try to vectorize trees that start at insertelement instructions. bool vectorizeInsertElementInst(InsertElementInst *IEI, BasicBlock *BB, slpvectorizer::BoUpSLP &R); + /// Try to vectorize trees that start at compare instructions. bool vectorizeCmpInst(CmpInst *CI, BasicBlock *BB, slpvectorizer::BoUpSLP &R); + /// Tries to vectorize constructs started from CmpInst, InsertValueInst or /// InsertElementInst instructions. bool vectorizeSimpleInstructions(SmallVectorImpl &Instructions, @@ -128,6 +149,7 @@ private: /// The getelementptr instructions in a basic block organized by base pointer. WeakTrackingVHListMap GEPs; }; -} + +} // end namespace llvm #endif // LLVM_TRANSFORMS_VECTORIZE_SLPVECTORIZER_H diff --git a/lib/Analysis/AssumptionCache.cpp b/lib/Analysis/AssumptionCache.cpp index 3ff27890dc3..8bfd24ccf77 100644 --- a/lib/Analysis/AssumptionCache.cpp +++ b/lib/Analysis/AssumptionCache.cpp @@ -13,14 +13,26 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/AssumptionCache.h" -#include "llvm/IR/CallSite.h" -#include "llvm/IR/Dominators.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/Support/Debug.h" +#include "llvm/Pass.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +#include + using namespace llvm; using namespace llvm::PatternMatch; @@ -255,8 +267,9 @@ AssumptionCacheTracker::AssumptionCacheTracker() : ImmutablePass(ID) { initializeAssumptionCacheTrackerPass(*PassRegistry::getPassRegistry()); } -AssumptionCacheTracker::~AssumptionCacheTracker() {} +AssumptionCacheTracker::~AssumptionCacheTracker() = default; + +char AssumptionCacheTracker::ID = 0; INITIALIZE_PASS(AssumptionCacheTracker, "assumption-cache-tracker", "Assumption Cache Tracker", false, true) -char AssumptionCacheTracker::ID = 0; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 4bf17b1502b..a9619746797 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,37 +13,66 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationDiagnosticInfo.h" -#include "llvm/Analysis/VectorUtils.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PatternMatch.h" -#include "llvm/IR/Statepoint.h" -#include "llvm/Support/Debug.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/KnownBits.h" #include "llvm/Support/MathExtras.h" #include #include -#include +#include +#include +#include +#include + using namespace llvm; using namespace llvm::PatternMatch; @@ -70,6 +99,7 @@ static unsigned getBitWidth(Type *Ty, const DataLayout &DL) { } namespace { + // Simplifying using an assume can only be done in a particular control-flow // context (the context instruction provides that context). If an assume and // the context instruction are not in the same block then the DT helps in @@ -79,6 +109,7 @@ struct Query { AssumptionCache *AC; const Instruction *CxtI; const DominatorTree *DT; + // Unlike the other analyses, this may be a nullptr because not all clients // provide it currently. OptimizationRemarkEmitter *ORE; @@ -92,11 +123,12 @@ struct Query { /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo /// (all of which can call computeKnownBits), and so on. std::array Excluded; - unsigned NumExcluded; + + unsigned NumExcluded = 0; Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT, OptimizationRemarkEmitter *ORE = nullptr) - : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), NumExcluded(0) {} + : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE) {} Query(const Query &Q, const Value *NewExcl) : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), @@ -113,6 +145,7 @@ struct Query { return std::find(Excluded.begin(), End, Value) != End; } }; + } // end anonymous namespace // Given the provided Value and, potentially, a context instruction, return @@ -171,7 +204,6 @@ bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS, return (LHSKnown.Zero | RHSKnown.Zero).isAllOnesValue(); } - bool llvm::isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI) { for (const User *U : CxtI->users()) { if (const ICmpInst *IC = dyn_cast(U)) @@ -380,7 +412,9 @@ static bool isEphemeralValueOf(const Instruction *I, const Value *E) { continue; // If all uses of this value are ephemeral, then so is this value. - if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) { + if (llvm::all_of(V->users(), [&](const User *U) { + return EphValues.count(U); + })) { if (V == E) return true; @@ -423,7 +457,6 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) { bool llvm::isValidAssumeForContext(const Instruction *Inv, const Instruction *CxtI, const DominatorTree *DT) { - // There are two restrictions on the use of an assume: // 1. The assume must dominate the context (or the control flow must // reach the assume whenever it reaches the context). @@ -891,7 +924,7 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, } break; } - case Instruction::Or: { + case Instruction::Or: computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); @@ -900,7 +933,6 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, // Output known-1 are known to be set if set in either the LHS | RHS. Known.One |= Known2.One; break; - } case Instruction::Xor: { computeKnownBits(I->getOperand(1), Known, Depth + 1, Q); computeKnownBits(I->getOperand(0), Known2, Depth + 1, Q); @@ -1911,7 +1943,7 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } } // Check if all incoming values are non-zero constant. - bool AllNonZeroConstants = all_of(PN->operands(), [](Value *V) { + bool AllNonZeroConstants = llvm::all_of(PN->operands(), [](Value *V) { return isa(V) && !cast(V)->isZero(); }); if (AllNonZeroConstants) @@ -2494,7 +2526,6 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(ImmutableCallSite ICS, /// /// NOTE: this function will need to be revisited when we support non-default /// rounding modes! -/// bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI, unsigned Depth) { if (const ConstantFP *CFP = dyn_cast(V)) @@ -2723,7 +2754,6 @@ Value *llvm::isBytewiseValue(Value *V) { return nullptr; } - // This is the recursive version of BuildSubAggregate. It takes a few different // arguments. Idxs is the index within the nested struct From that we are // looking at now (which is of type IndexedType). IdxSkip is the number of @@ -2734,7 +2764,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVectorImpl &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = dyn_cast(IndexedType); + StructType *STy = dyn_cast(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -2773,8 +2803,8 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, return nullptr; // Insert the value in the new (sub) aggregrate - return llvm::InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), - "tmp", InsertBefore); + return InsertValueInst::Create(To, V, makeArrayRef(Idxs).slice(IdxSkip), + "tmp", InsertBefore); } // This helper takes a nested struct and extracts a part of it (which is again a @@ -3745,7 +3775,7 @@ bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II, return true; }; - return any_of(GuardingBranches, AllUsesGuardedByBranch); + return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch); } @@ -3949,7 +3979,7 @@ bool llvm::programUndefinedIfFullPoison(const Instruction *PoisonI) { } break; - }; + } return false; } diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index 24489157773..4b8ddb7cc24 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -12,37 +12,67 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/MapVector.h" -#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Analysis/CFG.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Argument.h" +#include "llvm/IR/Attributes.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" #include "llvm/IR/Module.h" #include "llvm/IR/Statepoint.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" -#include "llvm/IR/Verifier.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include #define DEBUG_TYPE "rewrite-statepoints-for-gc" @@ -53,6 +83,7 @@ static cl::opt PrintLiveSet("spp-print-liveset", cl::Hidden, cl::init(false)); static cl::opt PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, cl::init(false)); + // Print out the base pointers for debugging static cl::opt PrintBasePointers("spp-print-base-pointers", cl::Hidden, cl::init(false)); @@ -68,6 +99,7 @@ static bool ClobberNonLive = true; #else static bool ClobberNonLive = false; #endif + static cl::opt ClobberNonLiveOverride("rs4gc-clobber-non-live", cl::location(ClobberNonLive), cl::Hidden); @@ -77,13 +109,16 @@ static cl::opt cl::Hidden, cl::init(true)); namespace { + struct RewriteStatepointsForGC : public ModulePass { static char ID; // Pass identification, replacement for typeid RewriteStatepointsForGC() : ModulePass(ID) { initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry()); } + bool runOnFunction(Function &F); + bool runOnModule(Module &M) override { bool Changed = false; for (Function &F : M) @@ -121,12 +156,14 @@ struct RewriteStatepointsForGC : public ModulePass { // Helpers for stripNonValidAttributesAndMetadata void stripNonValidAttributesAndMetadataFromBody(Function &F); void stripNonValidAttributesFromPrototype(Function &F); + // Certain metadata on instructions are invalid after running RS4GC. // Optimizations that run after RS4GC can incorrectly use this metadata to // optimize functions. We drop such metadata on the instruction. void stripInvalidMetadataFromInstruction(Instruction &I); }; -} // namespace + +} // end anonymous namespace char RewriteStatepointsForGC::ID = 0; @@ -142,9 +179,11 @@ INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", "Make relocations explicit at statepoints", false, false) namespace { + struct GCPtrLivenessData { /// Values defined in this block. MapVector> KillSet; + /// Values used in this block (and thus live); does not included values /// killed within this block. MapVector> LiveSet; @@ -168,10 +207,10 @@ struct GCPtrLivenessData { // Generally, after the execution of a full findBasePointer call, only the // base relation will remain. Internally, we add a mixture of the two // types, then update all the second type to the first type -typedef MapVector DefiningValueMapTy; -typedef SetVector StatepointLiveSetTy; -typedef MapVector, AssertingVH> - RematerializedValueMapTy; +using DefiningValueMapTy = MapVector; +using StatepointLiveSetTy = SetVector; +using RematerializedValueMapTy = + MapVector, AssertingVH>; struct PartiallyConstructedSafepointRecord { /// The set of values known to be live across this safepoint @@ -193,7 +232,8 @@ struct PartiallyConstructedSafepointRecord { /// Maps rematerialized copy to it's original value. RematerializedValueMapTy RematerializedValues; }; -} + +} // end anonymous namespace static ArrayRef GetDeoptBundleOperands(ImmutableCallSite CS) { Optional DeoptBundle = @@ -256,7 +296,7 @@ static bool containsGCPtrType(Type *Ty) { if (ArrayType *AT = dyn_cast(Ty)) return containsGCPtrType(AT->getElementType()); if (StructType *ST = dyn_cast(Ty)) - return any_of(ST->subtypes(), containsGCPtrType); + return llvm::any_of(ST->subtypes(), containsGCPtrType); return false; } @@ -301,7 +341,9 @@ analyzeParsePointLiveness(DominatorTree &DT, } static bool isKnownBaseResult(Value *V); + namespace { + /// A single base defining value - An immediate base defining value for an /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'. /// For instructions which have multiple pointer [vector] inputs or that @@ -313,9 +355,11 @@ namespace { struct BaseDefiningValueResult { /// Contains the value which is the base defining value. Value * const BDV; + /// True if the base defining value is also known to be an actual base /// pointer. const bool IsKnownBase; + BaseDefiningValueResult(Value *BDV, bool IsKnownBase) : BDV(BDV), IsKnownBase(IsKnownBase) { #ifndef NDEBUG @@ -326,7 +370,8 @@ struct BaseDefiningValueResult { #endif } }; -} + +} // end anonymous namespace static BaseDefiningValueResult findBaseDefiningValue(Value *I); @@ -431,7 +476,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { if (isa(I)) // The value loaded is an gc base itself return BaseDefiningValueResult(I, true); - if (GetElementPtrInst *GEP = dyn_cast(I)) // The base of this GEP is the base @@ -444,12 +488,11 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) { break; case Intrinsic::experimental_gc_statepoint: llvm_unreachable("statepoints don't produce pointers"); - case Intrinsic::experimental_gc_relocate: { + case Intrinsic::experimental_gc_relocate: // Rerunning safepoint insertion after safepoints are already // inserted is not supported. It could probably be made to work, // but why are you doing this? There's no good reason. llvm_unreachable("repeat safepoint insertion is not supported"); - } case Intrinsic::gcroot: // Currently, this mechanism hasn't been extended to work with gcroot. // There's no reason it couldn't be, but I haven't thought about the @@ -553,6 +596,7 @@ static bool isKnownBaseResult(Value *V) { } namespace { + /// Models the state of a single base defining value in the findBasePointer /// algorithm for determining where a new instruction is needed to propagate /// the base of this BDV. @@ -560,7 +604,7 @@ class BDVState { public: enum Status { Unknown, Base, Conflict }; - BDVState() : Status(Unknown), BaseValue(nullptr) {} + BDVState() : BaseValue(nullptr) {} explicit BDVState(Status Status, Value *BaseValue = nullptr) : Status(Status), BaseValue(BaseValue) { @@ -599,16 +643,17 @@ public: case Conflict: OS << "C"; break; - }; + } OS << " (" << getBaseValue() << " - " << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): "; } private: - Status Status; + Status Status = Unknown; AssertingVH BaseValue; // Non-null only if Status == Base. }; -} + +} // end anonymous namespace #ifndef NDEBUG static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) { @@ -1171,7 +1216,7 @@ static void CreateGCRelocates(ArrayRef LiveVariables, return; auto FindIndex = [](ArrayRef LiveVec, Value *Val) { - auto ValIt = find(LiveVec, Val); + auto ValIt = llvm::find(LiveVec, Val); assert(ValIt != LiveVec.end() && "Val not found in LiveVec!"); size_t Index = std::distance(LiveVec.begin(), ValIt); assert(Index < LiveVec.size() && "Bug in std::find?"); @@ -1231,7 +1276,7 @@ class DeferredReplacement { AssertingVH New; bool IsDeoptimize = false; - DeferredReplacement() {} + DeferredReplacement() = default; public: static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) { @@ -1288,7 +1333,8 @@ public: OldI->eraseFromParent(); } }; -} + +} // end anonymous namespace static StringRef getDeoptLowering(CallSite CS) { const char *DeoptLowering = "deopt-lowering"; @@ -1306,7 +1352,6 @@ static StringRef getDeoptLowering(CallSite CS) { return "live-through"; } - static void makeStatepointExplicitImpl(const CallSite CS, /* to replace */ const SmallVectorImpl &BasePtrs, @@ -1530,7 +1575,6 @@ static void insertRelocationStores(iterator_range GCRelocs, DenseMap &AllocaMap, DenseSet &VisitedLiveValues) { - for (User *U : GCRelocs) { GCRelocateInst *Relocate = dyn_cast(U); if (!Relocate) @@ -1566,7 +1610,6 @@ static void insertRematerializationStores( const RematerializedValueMapTy &RematerializedValues, DenseMap &AllocaMap, DenseSet &VisitedLiveValues) { - for (auto RematerializedValuePair: RematerializedValues) { Instruction *RematerializedValue = RematerializedValuePair.first; Value *OriginalValue = RematerializedValuePair.second; @@ -1832,7 +1875,6 @@ static void findLiveReferences( static Value* findRematerializableChainToBasePointer( SmallVectorImpl &ChainToBase, Value *CurrentValue) { - if (GetElementPtrInst *GEP = dyn_cast(CurrentValue)) { ChainToBase.push_back(GEP); return findRematerializableChainToBasePointer(ChainToBase, @@ -1888,7 +1930,6 @@ chainToBasePointerCost(SmallVectorImpl &Chain, } static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) { - unsigned PhiNum = OrigRootPhi.getNumIncomingValues(); if (PhiNum != AlternateRootPhi.getNumIncomingValues() || OrigRootPhi.getParent() != AlternateRootPhi.getParent()) @@ -1912,7 +1953,6 @@ static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPh return false; } return true; - } // From the statepoint live set pick values that are cheaper to recompute then @@ -2313,7 +2353,6 @@ RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) { } void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) { - if (!isa(I) && !isa(I)) return; // These are the attributes that are still valid on loads and stores after @@ -2339,7 +2378,6 @@ void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC. I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); - } void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Function &F) { @@ -2349,7 +2387,6 @@ void RewriteStatepointsForGC::stripNonValidAttributesAndMetadataFromBody(Functio LLVMContext &Ctx = F.getContext(); MDBuilder Builder(Ctx); - for (Instruction &I : instructions(F)) { if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); @@ -2398,7 +2435,7 @@ static bool shouldRewriteStatepointsIn(Function &F) { void RewriteStatepointsForGC::stripNonValidAttributesAndMetadata(Module &M) { #ifndef NDEBUG - assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!"); + assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); #endif for (Function &F : M) @@ -2666,7 +2703,6 @@ static void computeLiveInValues(DominatorTree &DT, Function &F, static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, StatepointLiveSetTy &Out) { - BasicBlock *BB = Inst->getParent(); // Note: The copy is intentional and required diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 458596b4c76..c96606af6bb 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -24,28 +24,53 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/ADT/APInt.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/PtrUseVisitor.h" -#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/ConstantFolder.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfo.h" +#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/GlobalAlias.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstVisitor.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/Support/Chrono.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -55,6 +80,17 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #ifndef NDEBUG // We only use this for a debug check. @@ -88,10 +124,12 @@ static cl::opt SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden); namespace { + /// \brief A custom IRBuilder inserter which prefixes all names, but only in /// Assert builds. class IRBuilderPrefixedInserter : public IRBuilderDefaultInserter { std::string Prefix; + const Twine getNameWithPrefix(const Twine &Name) const { return Name.isTriviallyEmpty() ? Name : Prefix + Name; } @@ -107,11 +145,9 @@ protected: } }; -/// \brief Provide a typedef for IRBuilder that drops names in release builds. -using IRBuilderTy = llvm::IRBuilder; -} +/// \brief Provide a type for IRBuilder that drops names in release builds. +using IRBuilderTy = IRBuilder; -namespace { /// \brief A used slice of an alloca. /// /// This structure represents a slice of an alloca used by some instruction. It @@ -120,17 +156,18 @@ namespace { /// or not when forming partitions of the alloca. class Slice { /// \brief The beginning offset of the range. - uint64_t BeginOffset; + uint64_t BeginOffset = 0; /// \brief The ending offset, not included in the range. - uint64_t EndOffset; + uint64_t EndOffset = 0; /// \brief Storage for both the use of this slice and whether it can be /// split. PointerIntPair UseAndIsSplittable; public: - Slice() : BeginOffset(), EndOffset() {} + Slice() = default; + Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable) : BeginOffset(BeginOffset), EndOffset(EndOffset), UseAndIsSplittable(U, IsSplittable) {} @@ -180,12 +217,15 @@ public: } bool operator!=(const Slice &RHS) const { return !operator==(RHS); } }; + } // end anonymous namespace namespace llvm { + template struct isPodLike; template <> struct isPodLike { static const bool value = true; }; -} + +} // end namespace llvm /// \brief Representation of the alloca slices. /// @@ -207,13 +247,15 @@ public: /// \brief Support for iterating over the slices. /// @{ - typedef SmallVectorImpl::iterator iterator; - typedef iterator_range range; + using iterator = SmallVectorImpl::iterator; + using range = iterator_range; + iterator begin() { return Slices.begin(); } iterator end() { return Slices.end(); } - typedef SmallVectorImpl::const_iterator const_iterator; - typedef iterator_range const_range; + using const_iterator = SmallVectorImpl::const_iterator; + using const_range = iterator_range; + const_iterator begin() const { return Slices.begin(); } const_iterator end() const { return Slices.end(); } /// @} @@ -264,6 +306,7 @@ public: private: template class BuilderBase; class SliceBuilder; + friend class AllocaSlices::SliceBuilder; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -320,7 +363,7 @@ private: friend class AllocaSlices; friend class AllocaSlices::partition_iterator; - typedef AllocaSlices::iterator iterator; + using iterator = AllocaSlices::iterator; /// \brief The beginning and ending offsets of the alloca for this /// partition. @@ -403,12 +446,12 @@ class AllocaSlices::partition_iterator /// \brief We also need to keep track of the maximum split end offset seen. /// FIXME: Do we really? - uint64_t MaxSplitSliceEndOffset; + uint64_t MaxSplitSliceEndOffset = 0; /// \brief Sets the partition to be empty at given iterator, and sets the /// end iterator. partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE) - : P(SI), SE(SE), MaxSplitSliceEndOffset(0) { + : P(SI), SE(SE) { // If not already at the end, advance our state to form the initial // partition. if (SI != SE) @@ -432,19 +475,21 @@ class AllocaSlices::partition_iterator // Remove the uses which have ended in the prior partition. This // cannot change the max split slice end because we just checked that // the prior partition ended prior to that max. - P.SplitTails.erase( - remove_if(P.SplitTails, - [&](Slice *S) { return S->endOffset() <= P.EndOffset; }), - P.SplitTails.end()); - assert(any_of(P.SplitTails, - [&](Slice *S) { - return S->endOffset() == MaxSplitSliceEndOffset; - }) && + P.SplitTails.erase(llvm::remove_if(P.SplitTails, + [&](Slice *S) { + return S->endOffset() <= + P.EndOffset; + }), + P.SplitTails.end()); + assert(llvm::any_of(P.SplitTails, + [&](Slice *S) { + return S->endOffset() == MaxSplitSliceEndOffset; + }) && "Could not find the current max split slice offset!"); - assert(all_of(P.SplitTails, - [&](Slice *S) { - return S->endOffset() <= MaxSplitSliceEndOffset; - }) && + assert(llvm::all_of(P.SplitTails, + [&](Slice *S) { + return S->endOffset() <= MaxSplitSliceEndOffset; + }) && "Max split slice end offset is not actually the max!"); } } @@ -608,7 +653,8 @@ static Value *foldPHINodeOrSelectInst(Instruction &I) { class AllocaSlices::SliceBuilder : public PtrUseVisitor { friend class PtrUseVisitor; friend class InstVisitor; - typedef PtrUseVisitor Base; + + using Base = PtrUseVisitor; const uint64_t AllocSize; AllocaSlices &AS; @@ -996,8 +1042,9 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) return; } - Slices.erase(remove_if(Slices, [](const Slice &S) { return S.isDead(); }), - Slices.end()); + Slices.erase( + llvm::remove_if(Slices, [](const Slice &S) { return S.isDead(); }), + Slices.end()); #ifndef NDEBUG if (SROARandomShuffleSlices) { @@ -1820,11 +1867,12 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) { // do that until all the backends are known to produce good code for all // integer vector types. if (!HaveCommonEltTy) { - CandidateTys.erase(remove_if(CandidateTys, - [](VectorType *VTy) { - return !VTy->getElementType()->isIntegerTy(); - }), - CandidateTys.end()); + CandidateTys.erase( + llvm::remove_if(CandidateTys, + [](VectorType *VTy) { + return !VTy->getElementType()->isIntegerTy(); + }), + CandidateTys.end()); // If there were no integer vector types, give up. if (CandidateTys.empty()) @@ -2151,8 +2199,9 @@ static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V, class llvm::sroa::AllocaSliceRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. - friend class llvm::InstVisitor; - typedef llvm::InstVisitor Base; + friend class InstVisitor; + + using Base = InstVisitor; const DataLayout &DL; AllocaSlices &AS; @@ -2182,16 +2231,18 @@ class llvm::sroa::AllocaSliceRewriter // The original offset of the slice currently being rewritten relative to // the original alloca. - uint64_t BeginOffset, EndOffset; + uint64_t BeginOffset = 0; + uint64_t EndOffset = 0; + // The new offsets of the slice currently being rewritten relative to the // original alloca. uint64_t NewBeginOffset, NewEndOffset; uint64_t SliceSize; - bool IsSplittable; - bool IsSplit; - Use *OldUse; - Instruction *OldPtr; + bool IsSplittable = false; + bool IsSplit = false; + Use *OldUse = nullptr; + Instruction *OldPtr = nullptr; // Track post-rewrite users which are PHI nodes and Selects. SmallSetVector &PHIUsers; @@ -2221,8 +2272,7 @@ public: VecTy(PromotableVecTy), ElementTy(VecTy ? VecTy->getElementType() : nullptr), ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0), - BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(), - OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers), + PHIUsers(PHIUsers), SelectUsers(SelectUsers), IRB(NewAI.getContext(), ConstantFolder()) { if (VecTy) { assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 && @@ -2987,6 +3037,7 @@ private: }; namespace { + /// \brief Visitor to rewrite aggregate loads and stores as scalar. /// /// This pass aggressively rewrites all aggregate loads and stores on @@ -2994,7 +3045,7 @@ namespace { /// with scalar loads and stores. class AggLoadStoreRewriter : public InstVisitor { // Befriend the base class so it can delegate to private visit methods. - friend class llvm::InstVisitor; + friend class InstVisitor; /// Queue of pointer uses to analyze and potentially rewrite. SmallVector Queue; @@ -3037,12 +3088,15 @@ private: protected: /// The builder used to form new instructions. IRBuilderTy IRB; + /// The indices which to be used with insert- or extractvalue to select the /// appropriate value within the aggregate. SmallVector Indices; + /// The indices to a GEP instruction which will move Ptr to the correct slot /// within the aggregate. SmallVector GEPIndices; + /// The base pointer of the original op, used as a base for GEPing the /// split operations. Value *Ptr; @@ -3193,7 +3247,8 @@ private: return false; } }; -} + +} // end anonymous namespace /// \brief Strip aggregate type wrapping. /// @@ -3485,58 +3540,60 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // match relative to their starting offset. We have to verify this prior to // any rewriting. Stores.erase( - remove_if(Stores, - [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { - // Lookup the load we are storing in our map of split - // offsets. - auto *LI = cast(SI->getValueOperand()); - // If it was completely unsplittable, then we're done, - // and this store can't be pre-split. - if (UnsplittableLoads.count(LI)) - return true; - - auto LoadOffsetsI = SplitOffsetsMap.find(LI); - if (LoadOffsetsI == SplitOffsetsMap.end()) - return false; // Unrelated loads are definitely safe. - auto &LoadOffsets = LoadOffsetsI->second; - - // Now lookup the store's offsets. - auto &StoreOffsets = SplitOffsetsMap[SI]; - - // If the relative offsets of each split in the load and - // store match exactly, then we can split them and we - // don't need to remove them here. - if (LoadOffsets.Splits == StoreOffsets.Splits) - return false; - - DEBUG(dbgs() << " Mismatched splits for load and store:\n" - << " " << *LI << "\n" - << " " << *SI << "\n"); - - // We've found a store and load that we need to split - // with mismatched relative splits. Just give up on them - // and remove both instructions from our list of - // candidates. - UnsplittableLoads.insert(LI); - return true; - }), + llvm::remove_if(Stores, + [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) { + // Lookup the load we are storing in our map of split + // offsets. + auto *LI = cast(SI->getValueOperand()); + // If it was completely unsplittable, then we're done, + // and this store can't be pre-split. + if (UnsplittableLoads.count(LI)) + return true; + + auto LoadOffsetsI = SplitOffsetsMap.find(LI); + if (LoadOffsetsI == SplitOffsetsMap.end()) + return false; // Unrelated loads are definitely safe. + auto &LoadOffsets = LoadOffsetsI->second; + + // Now lookup the store's offsets. + auto &StoreOffsets = SplitOffsetsMap[SI]; + + // If the relative offsets of each split in the load and + // store match exactly, then we can split them and we + // don't need to remove them here. + if (LoadOffsets.Splits == StoreOffsets.Splits) + return false; + + DEBUG(dbgs() + << " Mismatched splits for load and store:\n" + << " " << *LI << "\n" + << " " << *SI << "\n"); + + // We've found a store and load that we need to split + // with mismatched relative splits. Just give up on them + // and remove both instructions from our list of + // candidates. + UnsplittableLoads.insert(LI); + return true; + }), Stores.end()); // Now we have to go *back* through all the stores, because a later store may // have caused an earlier store's load to become unsplittable and if it is // unsplittable for the later store, then we can't rely on it being split in // the earlier store either. - Stores.erase(remove_if(Stores, - [&UnsplittableLoads](StoreInst *SI) { - auto *LI = cast(SI->getValueOperand()); - return UnsplittableLoads.count(LI); - }), + Stores.erase(llvm::remove_if(Stores, + [&UnsplittableLoads](StoreInst *SI) { + auto *LI = + cast(SI->getValueOperand()); + return UnsplittableLoads.count(LI); + }), Stores.end()); // Once we've established all the loads that can't be split for some reason, // filter any that made it into our list out. - Loads.erase(remove_if(Loads, - [&UnsplittableLoads](LoadInst *LI) { - return UnsplittableLoads.count(LI); - }), + Loads.erase(llvm::remove_if(Loads, + [&UnsplittableLoads](LoadInst *LI) { + return UnsplittableLoads.count(LI); + }), Loads.end()); // If no loads or stores are left, there is no pre-splitting to be done for @@ -3804,7 +3861,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { } // Remove the killed slices that have ben pre-split. - AS.erase(remove_if(AS, [](const Slice &S) { return S.isDead(); }), AS.end()); + AS.erase(llvm::remove_if(AS, [](const Slice &S) { return S.isDead(); }), + AS.end()); // Insert our new slices. This will sort and merge them into the sorted // sequence. @@ -3819,7 +3877,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) { // Finally, don't try to promote any allocas that new require re-splitting. // They have already been added to the worklist above. PromotableAllocas.erase( - remove_if( + llvm::remove_if( PromotableAllocas, [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }), PromotableAllocas.end()); @@ -4256,7 +4314,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT, auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); }; Worklist.remove_if(IsInSet); PostPromotionWorklist.remove_if(IsInSet); - PromotableAllocas.erase(remove_if(PromotableAllocas, IsInSet), + PromotableAllocas.erase(llvm::remove_if(PromotableAllocas, IsInSet), PromotableAllocas.end()); DeletedAllocas.clear(); } @@ -4291,9 +4349,12 @@ class llvm::sroa::SROALegacyPass : public FunctionPass { SROA Impl; public: + static char ID; + SROALegacyPass() : FunctionPass(ID) { initializeSROALegacyPassPass(*PassRegistry::getPassRegistry()); } + bool runOnFunction(Function &F) override { if (skipFunction(F)) return false; @@ -4303,6 +4364,7 @@ public: getAnalysis().getAssumptionCache(F)); return !PA.areAllPreserved(); } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -4311,7 +4373,6 @@ public: } StringRef getPassName() const override { return "SROA"; } - static char ID; }; char SROALegacyPass::ID = 0; diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 147860db400..b147445d716 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -15,38 +15,86 @@ // "Loop-Aware SLP in GCC" by Ira Rosen, Dorit Nuzman, Ayal Zaks. // //===----------------------------------------------------------------------===// + #include "llvm/Transforms/Vectorize/SLPVectorizer.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/iterator.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DemandedBits.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/VectorUtils.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/NoFolder.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/PassManager.h" #include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/User.h" #include "llvm/IR/Value.h" +#include "llvm/IR/ValueHandle.h" #include "llvm/IR/Verifier.h" #include "llvm/Pass.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/DOTGraphTraits.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/KnownBits.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Vectorize.h" #include +#include +#include +#include #include +#include +#include +#include +#include +#include using namespace llvm; using namespace llvm::PatternMatch; @@ -382,7 +430,6 @@ static bool matchExtractIndex(Instruction *E, unsigned Idx, unsigned Opcode) { /// possible scalar operand in vectorized instruction. static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst, TargetLibraryInfo *TLI) { - unsigned Opcode = UserInst->getOpcode(); switch (Opcode) { case Instruction::Load: { @@ -427,24 +474,25 @@ static bool isSimple(Instruction *I) { } namespace llvm { + namespace slpvectorizer { + /// Bottom Up SLP Vectorizer. class BoUpSLP { public: - typedef SmallVector ValueList; - typedef SmallVector InstrList; - typedef SmallPtrSet ValueSet; - typedef SmallVector StoreList; - typedef MapVector> - ExtraValueToDebugLocsMap; + using ValueList = SmallVector; + using InstrList = SmallVector; + using ValueSet = SmallPtrSet; + using StoreList = SmallVector; + using ExtraValueToDebugLocsMap = + MapVector>; BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, const DataLayout *DL, OptimizationRemarkEmitter *ORE) - : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), - SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), - DL(DL), ORE(ORE), Builder(Se->getContext()) { + : F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), + DB(DB), DL(DL), ORE(ORE), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); // Use the vector register size specified by the target unless overridden // by a command-line option. @@ -466,6 +514,7 @@ public: /// \brief Vectorize the tree that starts with the elements in \p VL. /// Returns the vectorized root. Value *vectorizeTree(); + /// Vectorize the tree but with the list of externally used values \p /// ExternallyUsedValues. Values in this MapVector can be replaced but the /// generated extractvalue instructions. @@ -483,6 +532,7 @@ public: /// the purpose of scheduling and extraction in the \p UserIgnoreLst. void buildTree(ArrayRef Roots, ArrayRef UserIgnoreLst = None); + /// Construct a vectorizable tree that starts at \p Roots, ignoring users for /// the purpose of scheduling and extraction in the \p UserIgnoreLst taking /// into account (anf updating it, if required) list of externally used @@ -599,15 +649,14 @@ private: void reorderAltShuffleOperands(unsigned Opcode, ArrayRef VL, SmallVectorImpl &Left, SmallVectorImpl &Right); + /// \reorder commutative operands to get better probability of /// generating vectorized code. void reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef VL, SmallVectorImpl &Left, SmallVectorImpl &Right); struct TreeEntry { - TreeEntry(std::vector &Container) - : Scalars(), VectorizedValue(nullptr), NeedToGather(0), - Container(Container) {} + TreeEntry(std::vector &Container) : Container(Container) {} /// \returns true if the scalars in VL are equal to this entry. bool isSame(ArrayRef VL) const { @@ -619,10 +668,10 @@ private: ValueList Scalars; /// The Scalars are vectorized into this value. It is initialized to Null. - Value *VectorizedValue; + Value *VectorizedValue = nullptr; /// Do we need to gather this sequence ? - bool NeedToGather; + bool NeedToGather = false; /// Points back to the VectorizableTree. /// @@ -686,16 +735,19 @@ private: /// This POD struct describes one external user in the vectorized tree. struct ExternalUser { - ExternalUser (Value *S, llvm::User *U, int L) : - Scalar(S), User(U), Lane(L){} + ExternalUser(Value *S, llvm::User *U, int L) + : Scalar(S), User(U), Lane(L) {} + // Which scalar in our function. Value *Scalar; + // Which user that uses the scalar. llvm::User *User; + // Which lane does the scalar belong to. int Lane; }; - typedef SmallVector UserList; + using UserList = SmallVector; /// Checks if two instructions may access the same memory. /// @@ -703,7 +755,6 @@ private: /// is invariant in the calling loop. bool isAliased(const MemoryLocation &Loc1, Instruction *Inst1, Instruction *Inst2) { - // First check if the result is already in the cache. AliasCacheKey key = std::make_pair(Inst1, Inst2); Optional &result = AliasCache[key]; @@ -721,7 +772,7 @@ private: return aliased; } - typedef std::pair AliasCacheKey; + using AliasCacheKey = std::pair; /// Cache for alias results. /// TODO: consider moving this to the AliasAnalysis itself. @@ -754,6 +805,7 @@ private: /// Holds all of the instructions that we gathered. SetVector GatherSeq; + /// A list of blocks that we are going to CSE. SetVector CSEBlocks; @@ -762,17 +814,11 @@ private: /// instruction bundle (= a group of instructions which is combined into a /// vector instruction). struct ScheduleData { - // The initial value for the dependency counters. It means that the // dependencies are not calculated yet. enum { InvalidDeps = -1 }; - ScheduleData() - : Inst(nullptr), FirstInBundle(nullptr), NextInBundle(nullptr), - NextLoadStore(nullptr), SchedulingRegionID(0), SchedulingPriority(0), - Dependencies(InvalidDeps), UnscheduledDeps(InvalidDeps), - UnscheduledDepsInBundle(InvalidDeps), IsScheduled(false), - OpValue(nullptr) {} + ScheduleData() = default; void init(int BlockSchedulingRegionID, Value *OpVal) { FirstInBundle = this; @@ -842,19 +888,19 @@ private: } } - Instruction *Inst; + Instruction *Inst = nullptr; /// Points to the head in an instruction bundle (and always to this for /// single instructions). - ScheduleData *FirstInBundle; + ScheduleData *FirstInBundle = nullptr; /// Single linked list of all instructions in a bundle. Null if it is a /// single instruction. - ScheduleData *NextInBundle; + ScheduleData *NextInBundle = nullptr; /// Single linked list of all memory instructions (e.g. load, store, call) /// in the block - until the end of the scheduling region. - ScheduleData *NextLoadStore; + ScheduleData *NextLoadStore = nullptr; /// The dependent memory instructions. /// This list is derived on demand in calculateDependencies(). @@ -862,34 +908,33 @@ private: /// This ScheduleData is in the current scheduling region if this matches /// the current SchedulingRegionID of BlockScheduling. - int SchedulingRegionID; + int SchedulingRegionID = 0; /// Used for getting a "good" final ordering of instructions. - int SchedulingPriority; + int SchedulingPriority = 0; /// The number of dependencies. Constitutes of the number of users of the /// instruction plus the number of dependent memory instructions (if any). /// This value is calculated on demand. /// If InvalidDeps, the number of dependencies is not calculated yet. - /// - int Dependencies; + int Dependencies = InvalidDeps; /// The number of dependencies minus the number of dependencies of scheduled /// instructions. As soon as this is zero, the instruction/bundle gets ready /// for scheduling. /// Note that this is negative as long as Dependencies is not calculated. - int UnscheduledDeps; + int UnscheduledDeps = InvalidDeps; /// The sum of UnscheduledDeps in a bundle. Equals to UnscheduledDeps for /// single instructions. - int UnscheduledDepsInBundle; + int UnscheduledDepsInBundle = InvalidDeps; /// True if this instruction is scheduled (or considered as scheduled in the /// dry-run). - bool IsScheduled; + bool IsScheduled = false; /// Opcode of the current instruction in the schedule data. - Value *OpValue; + Value *OpValue = nullptr; }; #ifndef NDEBUG @@ -903,18 +948,9 @@ private: friend struct DOTGraphTraits; /// Contains all scheduling data for a basic block. - /// struct BlockScheduling { - BlockScheduling(BasicBlock *BB) - : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize), - ScheduleStart(nullptr), ScheduleEnd(nullptr), - FirstLoadStoreInRegion(nullptr), LastLoadStoreInRegion(nullptr), - ScheduleRegionSize(0), - ScheduleRegionSizeLimit(ScheduleRegionSizeBudget), - // Make sure that the initial SchedulingRegionID is greater than the - // initial SchedulingRegionID in ScheduleData (which is 0). - SchedulingRegionID(1) {} + : BB(BB), ChunkSize(BB->size()), ChunkPos(ChunkSize) {} void clear() { ReadyInsts.clear(); @@ -1090,28 +1126,30 @@ private: ReadyList ReadyInsts; /// The first instruction of the scheduling region. - Instruction *ScheduleStart; + Instruction *ScheduleStart = nullptr; /// The first instruction _after_ the scheduling region. - Instruction *ScheduleEnd; + Instruction *ScheduleEnd = nullptr; /// The first memory accessing instruction in the scheduling region /// (can be null). - ScheduleData *FirstLoadStoreInRegion; + ScheduleData *FirstLoadStoreInRegion = nullptr; /// The last memory accessing instruction in the scheduling region /// (can be null). - ScheduleData *LastLoadStoreInRegion; + ScheduleData *LastLoadStoreInRegion = nullptr; /// The current size of the scheduling region. - int ScheduleRegionSize; + int ScheduleRegionSize = 0; /// The maximum size allowed for the scheduling region. - int ScheduleRegionSizeLimit; + int ScheduleRegionSizeLimit = ScheduleRegionSizeBudget; /// The ID of the scheduling region. For a new vectorization iteration this /// is incremented which "removes" all ScheduleData from the region. - int SchedulingRegionID; + int SchedulingRegionID = 1; + // Make sure that the initial SchedulingRegionID is greater than the + // initial SchedulingRegionID in ScheduleData (which is 0). }; /// Attaches the BlockScheduling structures to basic blocks. @@ -1125,10 +1163,10 @@ private: ArrayRef UserIgnoreList; // Number of load bundles that contain consecutive loads. - int NumLoadsWantToKeepOrder; + int NumLoadsWantToKeepOrder = 0; // Number of load bundles that contain consecutive loads in reversed order. - int NumLoadsWantToChangeOrder; + int NumLoadsWantToChangeOrder = 0; // Analysis and block reference. Function *F; @@ -1155,20 +1193,20 @@ private: /// original width. MapVector> MinBWs; }; + } // end namespace slpvectorizer template <> struct GraphTraits { - typedef BoUpSLP::TreeEntry TreeEntry; + using TreeEntry = BoUpSLP::TreeEntry; /// NodeRef has to be a pointer per the GraphWriter. - typedef TreeEntry *NodeRef; + using NodeRef = TreeEntry *; /// \brief Add the VectorizableTree to the index iterator to be able to return /// TreeEntry pointers. struct ChildIteratorType : public iterator_adaptor_base::iterator> { - std::vector &VectorizableTree; ChildIteratorType(SmallVector::iterator W, @@ -1183,17 +1221,19 @@ template <> struct GraphTraits { static ChildIteratorType child_begin(NodeRef N) { return {N->UserTreeIndices.begin(), N->Container}; } + static ChildIteratorType child_end(NodeRef N) { return {N->UserTreeIndices.end(), N->Container}; } /// For the node iterator we just need to turn the TreeEntry iterator into a /// TreeEntry* iterator so that it dereferences to NodeRef. - typedef pointer_iterator::iterator> nodes_iterator; + using nodes_iterator = pointer_iterator::iterator>; static nodes_iterator nodes_begin(BoUpSLP *R) { return nodes_iterator(R->VectorizableTree.begin()); } + static nodes_iterator nodes_end(BoUpSLP *R) { return nodes_iterator(R->VectorizableTree.end()); } @@ -1202,7 +1242,7 @@ template <> struct GraphTraits { }; template <> struct DOTGraphTraits : public DefaultDOTGraphTraits { - typedef BoUpSLP::TreeEntry TreeEntry; + using TreeEntry = BoUpSLP::TreeEntry; DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} @@ -1239,6 +1279,7 @@ void BoUpSLP::buildTree(ArrayRef Roots, ExtraValueToDebugLocsMap ExternallyUsedValues; buildTree(Roots, ExternallyUsedValues, UserIgnoreLst); } + void BoUpSLP::buildTree(ArrayRef Roots, ExtraValueToDebugLocsMap &ExternallyUsedValues, ArrayRef UserIgnoreLst) { @@ -1627,7 +1668,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, case Instruction::AShr: case Instruction::And: case Instruction::Or: - case Instruction::Xor: { + case Instruction::Xor: newTreeEntry(VL, true, UserTreeIdx); DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); @@ -1650,7 +1691,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; - } + case Instruction::GetElementPtr: { // We don't combine GEPs with complicated (nested) indexing. for (unsigned j = 0; j < VL.size(); ++j) { @@ -1784,7 +1825,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, } return; } - case Instruction::ShuffleVector: { + case Instruction::ShuffleVector: // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. if (!isAltShuffle) { @@ -1814,7 +1855,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, buildTree_rec(Operands, Depth + 1, UserTreeIdx); } return; - } + default: BS.cancelScheduling(VL, VL0); newTreeEntry(VL, false, UserTreeIdx); @@ -1942,11 +1983,11 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); Instruction *VL0 = cast(VL[0]); switch (Opcode) { - case Instruction::PHI: { + case Instruction::PHI: return 0; - } + case Instruction::ExtractValue: - case Instruction::ExtractElement: { + case Instruction::ExtractElement: if (canReuseExtract(VL, VL0)) { int DeadCost = 0; for (unsigned i = 0, e = VL.size(); i < e; ++i) { @@ -1962,7 +2003,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return -DeadCost; } return getGatherCost(VecTy); - } + case Instruction::ZExt: case Instruction::SExt: case Instruction::FPToUI: @@ -2173,7 +2214,6 @@ bool BoUpSLP::isFullyVectorizableTinyTree() { } bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() { - // We can vectorize the tree if its size is greater than or equal to the // minimum size specified by the MinTreeSize command line option. if (VectorizableTree.size() >= MinTreeSize) @@ -2465,8 +2505,7 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode, ArrayRef VL, SmallVectorImpl &Left, SmallVectorImpl &Right) { - - if (VL.size()) { + if (!VL.empty()) { // Peel the first iteration out of the loop since there's nothing // interesting to do anyway and it simplifies the checks in the loop. auto *I = cast(VL[0]); @@ -2556,14 +2595,13 @@ void BoUpSLP::reorderInputsAccordingToOpcode(unsigned Opcode, } void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, Value *OpValue) { - // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. auto *Front = cast(OpValue); auto *BB = Front->getParent(); const unsigned Opcode = cast(OpValue)->getOpcode(); const unsigned AltOpcode = getAltOpcode(Opcode); - assert(all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { + assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { return !sameOpcodeOrAlt(Opcode, AltOpcode, cast(V)->getOpcode()) || cast(V)->getParent() == BB; @@ -3082,7 +3120,6 @@ Value *BoUpSLP::vectorizeTree() { Value * BoUpSLP::vectorizeTree(ExtraValueToDebugLocsMap &ExternallyUsedValues) { - // All blocks must be scheduled before any instructions are inserted. for (auto &BSIter : BlocksSchedules) { scheduleBlock(BSIter.second.get()); @@ -3482,7 +3519,7 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V, BasicBlock::reverse_iterator UpperEnd = BB->rend(); BasicBlock::iterator DownIter = ScheduleEnd->getIterator(); BasicBlock::iterator LowerEnd = BB->end(); - for (;;) { + while (true) { if (++ScheduleRegionSize > ScheduleRegionSizeLimit) { DEBUG(dbgs() << "SLP: exceeded schedule region size limit\n"); return false; @@ -3696,7 +3733,6 @@ void BoUpSLP::BlockScheduling::resetSchedule() { } void BoUpSLP::scheduleBlock(BlockScheduling *BS) { - if (!BS->ScheduleStart) return; @@ -3828,7 +3864,6 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) { static bool collectValuesToDemote(Value *V, SmallPtrSetImpl &Expr, SmallVectorImpl &ToDemote, SmallVectorImpl &Roots) { - // We can always demote constants. if (isa(V)) { ToDemote.push_back(V); @@ -3971,7 +4006,7 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine if the sign bit of all the roots is known to be zero. If not, // IsKnownPositive is set to False. - IsKnownPositive = all_of(TreeRoot, [&](Value *R) { + IsKnownPositive = llvm::all_of(TreeRoot, [&](Value *R) { KnownBits Known = computeKnownBits(R, *DL); return Known.isNonNegative(); }); @@ -3979,7 +4014,7 @@ void BoUpSLP::computeMinimumValueSizes() { // Determine the maximum number of bits required to store the scalar // values. for (auto *Scalar : ToDemote) { - auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT); + auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, nullptr, DT); auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); MaxBitWidth = std::max(NumTypeBits - NumSignBits, MaxBitWidth); } @@ -4024,6 +4059,7 @@ void BoUpSLP::computeMinimumValueSizes() { } namespace { + /// The SLPVectorizer Pass. struct SLPVectorizer : public FunctionPass { SLPVectorizerPass Impl; @@ -4035,7 +4071,6 @@ struct SLPVectorizer : public FunctionPass { initializeSLPVectorizerPass(*PassRegistry::getPassRegistry()); } - bool doInitialization(Module &M) override { return false; } @@ -4075,6 +4110,7 @@ struct SLPVectorizer : public FunctionPass { AU.setPreservesCFG(); } }; + } // end anonymous namespace PreservedAnalyses SLPVectorizerPass::run(Function &F, FunctionAnalysisManager &AM) { @@ -4221,7 +4257,9 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef Chain, BoUpSLP &R, DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); if (Cost < -SLPCostThreshold) { DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); + using namespace ore; + R.getORE()->emit(OptimizationRemark(SV_NAME, "StoresVectorized", cast(Chain[i])) << "Stores SLP vectorized with cost " << NV("Cost", Cost) @@ -4310,7 +4348,6 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef Stores, } void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { - // Initialize the collections. We will make a single pass over the block. Stores.clear(); GEPs.clear(); @@ -4319,7 +4356,6 @@ void SLPVectorizerPass::collectSeedInstructions(BasicBlock *BB) { // Stores and GEPs according to the underlying objects of their pointer // operands. for (Instruction &I : *BB) { - // Ignore store instructions that are volatile or have a pointer operand // that doesn't point to a scalar type. if (auto *SI = dyn_cast(&I)) { @@ -4557,6 +4593,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx, } namespace { + /// Model horizontal reductions. /// /// A horizontal reduction is a tree of reduction operations (currently add and @@ -4594,10 +4631,13 @@ class HorizontalReduction { struct OperationData { /// true if the operation is a reduced value, false if reduction operation. bool IsReducedValue = false; + /// Opcode of the instruction. unsigned Opcode = 0; + /// Left operand of the reduction operation. Value *LHS = nullptr; + /// Right operand of the reduction operation. Value *RHS = nullptr; @@ -4610,40 +4650,48 @@ class HorizontalReduction { public: explicit OperationData() = default; + /// Construction for reduced values. They are identified by opcode only and /// don't have associated LHS/RHS values. explicit OperationData(Value *V) : IsReducedValue(true) { if (auto *I = dyn_cast(V)) Opcode = I->getOpcode(); } + /// Constructor for binary reduction operations with opcode and its left and /// right operands. OperationData(unsigned Opcode, Value *LHS, Value *RHS) - : IsReducedValue(false), Opcode(Opcode), LHS(LHS), RHS(RHS) {} + : Opcode(Opcode), LHS(LHS), RHS(RHS) {} + explicit operator bool() const { return Opcode; } + /// Get the index of the first operand. unsigned getFirstOperandIndex() const { assert(!!*this && "The opcode is not set."); return 0; } + /// Total number of operands in the reduction operation. unsigned getNumberOfOperands() const { assert(!IsReducedValue && !!*this && LHS && RHS && "Expected reduction operation."); return 2; } + /// Expected number of uses for reduction operations/reduced values. unsigned getRequiredNumberOfUses() const { assert(!IsReducedValue && !!*this && LHS && RHS && "Expected reduction operation."); return 1; } + /// Checks if instruction is associative and can be vectorized. bool isAssociative(Instruction *I) const { assert(!IsReducedValue && *this && LHS && RHS && "Expected reduction operation."); return I->isAssociative(); } + /// Checks if the reduction operation can be vectorized. bool isVectorizable(Instruction *I) const { return isVectorizable() && isAssociative(I); @@ -4665,13 +4713,16 @@ class HorizontalReduction { LHS = nullptr; RHS = nullptr; } + /// Get the opcode of the reduction operation. unsigned getOpcode() const { assert(isVectorizable() && "Expected vectorizable operation."); return Opcode; } + Value *getLHS() const { return LHS; } Value *getRHS() const { return RHS; } + /// Creates reduction operation with the current opcode. Value *createOp(IRBuilder<> &Builder, const Twine &Name = "") const { assert(!IsReducedValue && @@ -4686,8 +4737,10 @@ class HorizontalReduction { /// The operation data of the reduction operation. OperationData ReductionData; + /// The operation data of the values we perform a reduction on. OperationData ReducedValueData; + /// Should we model this reduction as a pairwise reduction tree or a tree that /// splits the vector in halves and adds those halves. bool IsPairwiseReduction = false; @@ -5018,6 +5071,7 @@ private: return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0)); } }; + } // end anonymous namespace /// \brief Recognize construction of vectors like @@ -5425,7 +5479,6 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) { bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) { auto Changed = false; for (auto &Entry : GEPs) { - // If the getelementptr list has fewer than two elements, there's nothing // to do. if (Entry.second.size() < 2) @@ -5530,7 +5583,9 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) { } char SLPVectorizer::ID = 0; + static const char lv_name[] = "SLP Vectorizer"; + INITIALIZE_PASS_BEGIN(SLPVectorizer, SV_NAME, lv_name, false, false) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) @@ -5541,6 +5596,4 @@ INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) INITIALIZE_PASS_END(SLPVectorizer, SV_NAME, lv_name, false, false) -namespace llvm { -Pass *createSLPVectorizerPass() { return new SLPVectorizer(); } -} +Pass *llvm::createSLPVectorizerPass() { return new SLPVectorizer(); }