// pass. It should be easy to create an analysis pass around it if there
// is a need (but D45420 needs to happen first).
//
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
#include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/IntrinsicInst.h"
namespace llvm {
-#ifndef NDEBUG
-static void debugVectorizationFailure(const StringRef DebugMsg,
- Instruction *I) {
- dbgs() << "LV: Not vectorizing: " << DebugMsg;
- if (I != nullptr)
- dbgs() << " " << *I;
- else
- dbgs() << '.';
- dbgs() << '\n';
-}
-#endif
-
-OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
- StringRef RemarkName,
- Loop *TheLoop,
- Instruction *I) {
- Value *CodeRegion = TheLoop->getHeader();
- DebugLoc DL = TheLoop->getStartLoc();
-
- if (I) {
- CodeRegion = I->getParent();
- // If there is no debug location attached to the instruction, revert back to
- // using the loop's.
- if (I->getDebugLoc())
- DL = I->getDebugLoc();
- }
-
- OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
- R << "loop not vectorized: ";
- return R;
-}
-
bool LoopVectorizeHints::Hint::validate(unsigned Val) {
switch (Kind) {
case HK_WIDTH:
return LAI->isUniform(V);
}
-void LoopVectorizationLegality::reportVectorizationFailure(
- const StringRef DebugMsg, const StringRef OREMsg,
- const StringRef ORETag, Instruction *I) const {
- LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
- ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- ORETag, TheLoop, I) << OREMsg);
-}
-
bool LoopVectorizationLegality::canVectorizeOuterLoop() {
assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
// Store the result and return it at the end instead of exiting early, in case
if (!Br) {
reportVectorizationFailure("Unsupported basic block terminator",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
!LI->isLoopHeader(Br->getSuccessor(1))) {
reportVectorizationFailure("Unsupported conditional branch",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
TheLoop /*context outer loop*/)) {
reportVectorizationFailure("Outer loop contains divergent loops",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
if (!setupOuterLoopInductions()) {
reportVectorizationFailure("Unsupported outer loop Phi(s)",
"Unsupported outer loop Phi(s)",
- "UnsupportedPhi");
+ "UnsupportedPhi", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
!PhiTy->isPointerTy()) {
reportVectorizationFailure("Found a non-int non-pointer PHI",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
return false;
}
if (Phi->getNumIncomingValues() != 2) {
reportVectorizationFailure("Found an invalid PHI",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood", Phi);
+ "CFGNotUnderstood", ORE, TheLoop, Phi);
return false;
}
reportVectorizationFailure("Found an unidentified PHI",
"value that could not be identified as "
"reduction is used outside the loop",
- "NonReductionValueUsedOutsideLoop", Phi);
+ "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
return false;
} // end of PHI handling
"library call cannot be vectorized. "
"Try compiling with -fno-math-errno, -ffast-math, "
"or similar flags",
- "CantVectorizeLibcall", CI);
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
} else {
reportVectorizationFailure("Found a non-intrinsic callsite",
"call instruction cannot be vectorized",
- "CantVectorizeLibcall", CI);
+ "CantVectorizeLibcall", ORE, TheLoop, CI);
}
return false;
}
if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
reportVectorizationFailure("Found unvectorizable intrinsic",
"intrinsic instruction cannot be vectorized",
- "CantVectorizeIntrinsic", CI);
+ "CantVectorizeIntrinsic", ORE, TheLoop, CI);
return false;
}
}
isa<ExtractElementInst>(I)) {
reportVectorizationFailure("Found unvectorizable type",
"instruction return type cannot be vectorized",
- "CantVectorizeInstructionReturnType", &I);
+ "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
return false;
}
if (!VectorType::isValidElementType(T)) {
reportVectorizationFailure("Store instruction cannot be vectorized",
"store instruction cannot be vectorized",
- "CantVectorizeStore", ST);
+ "CantVectorizeStore", ORE, TheLoop, ST);
return false;
}
reportVectorizationFailure(
"nontemporal store instruction cannot be vectorized",
"nontemporal store instruction cannot be vectorized",
- "CantVectorizeNontemporalStore", ST);
+ "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
return false;
}
}
reportVectorizationFailure(
"nontemporal load instruction cannot be vectorized",
"nontemporal load instruction cannot be vectorized",
- "CantVectorizeNontemporalLoad", LD);
+ "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
return false;
}
}
}
reportVectorizationFailure("Value cannot be used outside the loop",
"value cannot be used outside the loop",
- "ValueUsedOutsideLoop", &I);
+ "ValueUsedOutsideLoop", ORE, TheLoop, &I);
return false;
}
} // next instr.
if (Inductions.empty()) {
reportVectorizationFailure("Did not find one integer induction var",
"loop induction variable could not be identified",
- "NoInductionVariable");
+ "NoInductionVariable", ORE, TheLoop);
return false;
} else if (!WidestIndTy) {
reportVectorizationFailure("Did not find one integer induction var",
"integer loop induction variable could not be identified",
- "NoIntegerInductionVariable");
+ "NoIntegerInductionVariable", ORE, TheLoop);
return false;
} else {
LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
reportVectorizationFailure("Stores to a uniform address",
"write to a loop invariant address could not be vectorized",
- "CantVectorizeStoreToLoopInvariantAddress");
+ "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
return false;
}
Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
if (!EnableIfConversion) {
reportVectorizationFailure("If-conversion is disabled",
"if-conversion is disabled",
- "IfConversionDisabled");
+ "IfConversionDisabled",
+ ORE, TheLoop);
return false;
}
if (!isa<BranchInst>(BB->getTerminator())) {
reportVectorizationFailure("Loop contains a switch statement",
"loop contains a switch statement",
- "LoopContainsSwitch", BB->getTerminator());
+ "LoopContainsSwitch", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
} else if (BB != Header && !canIfConvertPHINodes(BB)) {
reportVectorizationFailure(
"Control flow cannot be substituted for a select",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
}
if (!Lp->getLoopPreheader()) {
reportVectorizationFailure("Loop doesn't have a legal pre-header",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
if (Lp->getNumBackEdges() != 1) {
reportVectorizationFailure("The loop must have a single backedge",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
if (!Lp->getExitingBlock()) {
reportVectorizationFailure("The loop must have an exiting block",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
reportVectorizationFailure("The exiting block is not the loop latch",
"loop control flow is not understood by vectorizer",
- "CFGNotUnderstood");
+ "CFGNotUnderstood", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
if (!canVectorizeOuterLoop()) {
reportVectorizationFailure("Unsupported outer loop",
"unsupported outer loop",
- "UnsupportedOuterLoop");
+ "UnsupportedOuterLoop",
+ ORE, TheLoop);
// TODO: Implement DoExtraAnalysis when subsequent legal checks support
// outer loops.
return false;
if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
reportVectorizationFailure("Too many SCEV checks needed",
"Too many SCEV assumptions need to be made and checked at runtime",
- "TooManySCEVRunTimeChecks");
+ "TooManySCEVRunTimeChecks", ORE, TheLoop);
if (DoExtraAnalysis)
Result = false;
else
"No primary induction, cannot fold tail by masking",
"Missing a primary induction variable in the loop, which is "
"needed in order to fold tail by masking as required.",
- "NoPrimaryInduction");
+ "NoPrimaryInduction", ORE, TheLoop);
return false;
}
reportVectorizationFailure(
"Loop has reductions, cannot fold tail by masking",
"Cannot fold tail by masking in the presence of reductions.",
- "ReductionFoldingTailByMasking");
+ "ReductionFoldingTailByMasking", ORE, TheLoop);
return false;
}
reportVectorizationFailure(
"Cannot fold tail by masking, loop has an outside user for",
"Cannot fold tail by masking in the presence of live outs.",
- "LiveOutFoldingTailByMasking", UI);
+ "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
return false;
}
}
reportVectorizationFailure(
"Cannot fold tail by masking as required",
"control flow cannot be substituted for a select",
- "NoCFGForSelect", BB->getTerminator());
+ "NoCFGForSelect", ORE, TheLoop,
+ BB->getTerminator());
return false;
}
}
B.SetCurrentDebugLocation(DebugLoc());
}
+/// Write a record \p DebugMsg about vectorization failure to the debug
+/// output stream. If \p I is passed, it is an instruction that prevents
+/// vectorization.
+#ifndef NDEBUG
+static void debugVectorizationFailure(const StringRef DebugMsg,
+ Instruction *I) {
+ dbgs() << "LV: Not vectorizing: " << DebugMsg;
+ if (I != nullptr)
+ dbgs() << " " << *I;
+ else
+ dbgs() << '.';
+ dbgs() << '\n';
+}
+#endif
+
+/// Create an analysis remark that explains why vectorization failed
+///
+/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p
+/// RemarkName is the identifier for the remark. If \p I is passed it is an
+/// instruction that prevents vectorization. Otherwise \p TheLoop is used for
+/// the location of the remark. \return the remark object that can be
+/// streamed to.
+static OptimizationRemarkAnalysis createLVAnalysis(const char *PassName,
+ StringRef RemarkName, Loop *TheLoop, Instruction *I) {
+ Value *CodeRegion = TheLoop->getHeader();
+ DebugLoc DL = TheLoop->getStartLoc();
+
+ if (I) {
+ CodeRegion = I->getParent();
+ // If there is no debug location attached to the instruction, revert back to
+ // using the loop's.
+ if (I->getDebugLoc())
+ DL = I->getDebugLoc();
+ }
+
+ OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
+ R << "loop not vectorized: ";
+ return R;
+}
+
+namespace llvm {
+
+void reportVectorizationFailure(const StringRef DebugMsg,
+ const StringRef OREMsg, const StringRef ORETag,
+ OptimizationRemarkEmitter *ORE, Loop *TheLoop, Instruction *I) {
+ LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
+ LoopVectorizeHints Hints(TheLoop, true /* doesn't matter */, *ORE);
+ ORE->emit(createLVAnalysis(Hints.vectorizeAnalysisPassName(),
+ ORETag, TheLoop, I) << OREMsg);
+}
+
+} // end namespace llvm
+
#ifndef NDEBUG
/// \return string containing a file name and a line # for the given loop.
static std::string getDebugLocString(const Loop *L) {
/// should be used.
bool useEmulatedMaskMemRefHack(Instruction *I);
- /// Create an analysis remark that explains why vectorization failed
- ///
- /// \p RemarkName is the identifier for the remark. \return the remark object
- /// that can be streamed to.
- OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) {
- return createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
- RemarkName, TheLoop);
- }
-
/// Map of scalar integer values to the smallest bitwidth they can be legally
/// represented as. The vector equivalents of these values should be truncated
/// to this type.
LLVM_DEBUG(dbgs() << "LV: Performing code size checks.\n");
if (Legal->getRuntimePointerChecking()->Need) {
- ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
- << "runtime pointer checks needed. Enable vectorization of this "
- "loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs()
- << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
+ reportVectorizationFailure("Runtime ptr check is required with -Os/-Oz",
+ "runtime pointer checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz",
+ "CantVersionLoopWithOptForSize", ORE, TheLoop);
return true;
}
if (!PSE.getUnionPredicate().getPredicates().empty()) {
- ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
- << "runtime SCEV checks needed. Enable vectorization of this "
- "loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs()
- << "LV: Aborting. Runtime SCEV check is required with -Os/-Oz.\n");
+ reportVectorizationFailure("Runtime SCEV check is required with -Os/-Oz",
+ "runtime SCEV checks needed. Enable vectorization of this "
+ "loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz",
+ "CantVersionLoopWithOptForSize", ORE, TheLoop);
return true;
}
// FIXME: Avoid specializing for stride==1 instead of bailing out.
if (!Legal->getLAI()->getSymbolicStrides().empty()) {
- ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
- << "runtime stride == 1 checks needed. Enable vectorization of "
- "this loop with '#pragma clang loop vectorize(enable)' when "
- "compiling with -Os/-Oz");
- LLVM_DEBUG(
- dbgs()
- << "LV: Aborting. Runtime stride check is required with -Os/-Oz.\n");
+ reportVectorizationFailure("Runtime stride check is required with -Os/-Oz",
+ "runtime stride == 1 checks needed. Enable vectorization of "
+ "this loop with '#pragma clang loop vectorize(enable)' when "
+ "compiling with -Os/-Oz",
+ "CantVersionLoopWithOptForSize", ORE, TheLoop);
return true;
}
if (Legal->getRuntimePointerChecking()->Need && TTI.hasBranchDivergence()) {
// TODO: It may by useful to do since it's still likely to be dynamically
// uniform if the target can skip.
- LLVM_DEBUG(
- dbgs() << "LV: Not inserting runtime ptr check for divergent target");
-
- ORE->emit(
- createMissedAnalysis("CantVersionLoopWithDivergentTarget")
- << "runtime pointer checks needed. Not enabled for divergent target");
-
+ reportVectorizationFailure(
+ "Not inserting runtime ptr check for divergent target",
+ "runtime pointer checks needed. Not enabled for divergent target",
+ "CantVersionLoopWithDivergentTarget", ORE, TheLoop);
return None;
}
unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
LLVM_DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
if (TC == 1) {
- ORE->emit(createMissedAnalysis("SingleIterationLoop")
- << "loop trip count is one, irrelevant for vectorization");
- LLVM_DEBUG(dbgs() << "LV: Aborting, single iteration (non) loop.\n");
+ reportVectorizationFailure("Single iteration (non) loop",
+ "loop trip count is one, irrelevant for vectorization",
+ "SingleIterationLoop", ORE, TheLoop);
return None;
}
}
if (TC == 0) {
- ORE->emit(
- createMissedAnalysis("UnknownLoopCountComplexCFG")
- << "unable to calculate the loop count due to complex control flow");
+ reportVectorizationFailure(
+ "Unable to calculate the loop count due to complex control flow",
+ "unable to calculate the loop count due to complex control flow",
+ "UnknownLoopCountComplexCFG", ORE, TheLoop);
return None;
}
- ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
- << "cannot optimize for size and vectorize at the same time. "
- "Enable vectorization of this loop with '#pragma clang loop "
- "vectorize(enable)' when compiling with -Os/-Oz");
+ reportVectorizationFailure(
+ "Cannot optimize for size and vectorize at the same time.",
+ "cannot optimize for size and vectorize at the same time. "
+ "Enable vectorization of this loop with '#pragma clang loop "
+ "vectorize(enable)' when compiling with -Os/-Oz",
+ "NoTailLoopWithOptForSize", ORE, TheLoop);
return None;
}
}
if (!EnableCondStoresVectorization && NumPredStores) {
- ORE->emit(createMissedAnalysis("ConditionalStore")
- << "store that is conditionally executed prevents vectorization");
- LLVM_DEBUG(
- dbgs() << "LV: No vectorization. There are conditional stores.\n");
+ reportVectorizationFailure("There are conditional stores.",
+ "store that is conditionally executed prevents vectorization",
+ "ConditionalStore", ORE, TheLoop);
Width = 1;
Cost = ScalarCost;
}
// an integer loop and the vector instructions selected are purely integer
// vector instructions?
if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
- LLVM_DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
- "attribute is used.\n");
- ORE->emit(createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(),
- "NoImplicitFloat", L)
- << "loop not vectorized due to NoImplicitFloat attribute");
+ reportVectorizationFailure(
+ "Can't vectorize when the NoImplicitFloat attribute is used",
+ "loop not vectorized due to NoImplicitFloat attribute",
+ "NoImplicitFloat", ORE, L);
Hints.emitRemarkWithHints();
return false;
}
// additional fp-math flags can help.
if (Hints.isPotentiallyUnsafe() &&
TTI->isFPVectorizationPotentiallyUnsafe()) {
- LLVM_DEBUG(
- dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
- ORE->emit(
- createLVMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
- << "loop not vectorized due to unsafe FP support.");
+ reportVectorizationFailure(
+ "Potentially unsafe FP op prevents vectorization",
+ "loop not vectorized due to unsafe FP support.",
+ "UnsafeFP", ORE, L);
Hints.emitRemarkWithHints();
return false;
}