#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/GraphWriter.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Vectorize.h"
#include <algorithm>
"slp-min-tree-size", cl::init(3), cl::Hidden,
cl::desc("Only vectorize small trees if they are fully vectorizable"));
+static cl::opt<bool>
+ ViewSLPTree("view-slp-tree", cl::Hidden,
+ cl::desc("Display the SLP trees with Graphviz"));
+
// Limit the number of alias checks. The limit is chosen so that
// it has no negative effect on the llvm benchmarks.
static const unsigned AliasedCheckLimit = 10;
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, int);
/// \returns True if the ExtractElement/ExtractValue instructions in VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a vector).
SmallVectorImpl<Value *> &Left,
SmallVectorImpl<Value *> &Right);
struct TreeEntry {
- TreeEntry() : Scalars(), VectorizedValue(nullptr),
- NeedToGather(0), NeedToShuffle(0) {}
+ TreeEntry(std::vector<TreeEntry> &Container)
+ : Scalars(), VectorizedValue(nullptr), NeedToGather(0),
+ NeedToShuffle(0), Container(Container) {}
/// \returns true if the scalars in VL are equal to this entry.
bool isSame(ArrayRef<Value *> VL) const {
/// Do we need to shuffle the load ?
bool NeedToShuffle;
+
+ /// Points back to the VectorizableTree.
+ ///
+ /// Only used for Graphviz right now. Unfortunately GraphTrait::NodeRef has
+ /// to be a pointer and needs to be able to initialize the child iterator.
+ /// Thus we need a reference back to the container to translate the indices
+ /// to entries.
+ std::vector<TreeEntry> &Container;
+
+ /// The TreeEntry index containing the user of this entry. We can actually
+ /// have multiple users so the data structure is not truly a tree.
+ SmallVector<int, 1> UserTreeIndices;
};
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
- bool NeedToShuffle) {
- VectorizableTree.emplace_back();
+ bool NeedToShuffle, int &UserTreeIdx) {
+ VectorizableTree.emplace_back(VectorizableTree);
int idx = VectorizableTree.size() - 1;
TreeEntry *Last = &VectorizableTree[idx];
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
} else {
MustGather.insert(VL.begin(), VL.end());
}
+
+ if (UserTreeIdx >= 0)
+ Last->UserTreeIndices.push_back(UserTreeIdx);
+ UserTreeIdx = idx;
return Last;
}
return os;
}
#endif
+ friend struct GraphTraits<BoUpSLP *>;
+ friend struct DOTGraphTraits<BoUpSLP *>;
/// Contains all scheduling data for a basic block.
///
/// original width.
MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
};
+} // end namespace slpvectorizer
+
+template <> struct GraphTraits<BoUpSLP *> {
+ typedef BoUpSLP::TreeEntry TreeEntry;
+
+ /// NodeRef has to be a pointer per the GraphWriter.
+ typedef TreeEntry *NodeRef;
+
+ /// \brief Add the VectorizableTree to the index iterator to be able to return
+ /// TreeEntry pointers.
+ struct ChildIteratorType
+ : public iterator_adaptor_base<ChildIteratorType,
+ SmallVector<int, 1>::iterator> {
+
+ std::vector<TreeEntry> &VectorizableTree;
+
+ ChildIteratorType(SmallVector<int, 1>::iterator W,
+ std::vector<TreeEntry> &VT)
+ : ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
+
+ NodeRef operator*() { return &VectorizableTree[*I]; }
+ };
+
+ static NodeRef getEntryNode(BoUpSLP &R) { return &R.VectorizableTree[0]; }
+
+ static ChildIteratorType child_begin(NodeRef N) {
+ return {N->UserTreeIndices.begin(), N->Container};
+ }
+ static ChildIteratorType child_end(NodeRef N) {
+ return {N->UserTreeIndices.end(), N->Container};
+ }
+
+ /// For the node iterator we just need to turn the TreeEntry iterator into a
+ /// TreeEntry* iterator so that it dereferences to NodeRef.
+ typedef pointer_iterator<std::vector<TreeEntry>::iterator> nodes_iterator;
+
+ static nodes_iterator nodes_begin(BoUpSLP *R) {
+ return nodes_iterator(R->VectorizableTree.begin());
+ }
+ static nodes_iterator nodes_end(BoUpSLP *R) {
+ return nodes_iterator(R->VectorizableTree.end());
+ }
+
+ static unsigned size(BoUpSLP *R) { return R->VectorizableTree.size(); }
+};
+
+template <> struct DOTGraphTraits<BoUpSLP *> : public DefaultDOTGraphTraits {
+ typedef BoUpSLP::TreeEntry TreeEntry;
+
+ DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+ std::string getNodeLabel(const TreeEntry *Entry, const BoUpSLP *) {
+ std::string Str;
+ raw_string_ostream OS(Str);
+ if (isSplat(Entry->Scalars)) {
+ OS << "<splat> " << *Entry->Scalars[0];
+ return Str;
+ }
+ for (auto V : Entry->Scalars)
+ OS << *V << "\n";
+ return Str;
+ }
+
+ static std::string getNodeAttributes(const TreeEntry *Entry,
+ const BoUpSLP *) {
+ if (Entry->NeedToGather)
+ return "color=red";
+ return "";
+ }
+};
} // end namespace llvm
-} // end namespace slpvectorizer
void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
ArrayRef<Value *> UserIgnoreLst) {
UserIgnoreList = UserIgnoreLst;
if (!allSameType(Roots))
return;
- buildTree_rec(Roots, 0);
+ buildTree_rec(Roots, 0, -1);
// Collect the values that we need to extract from the tree.
for (TreeEntry &EIdx : VectorizableTree) {
}
}
-
-void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
+void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
+ int UserTreeIdx) {
bool isAltShuffle = false;
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
if (Depth == RecursionMaxDepth) {
DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
// Don't handle vectors.
if (VL[0]->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
if (SI->getValueOperand()->getType()->isVectorTy()) {
DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
unsigned Opcode = getSameOpcode(VL);
// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) {
DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
if (EphValues.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is ephemeral.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n");
if (E->Scalars[i] != VL[i]) {
DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
+ // Record the reuse of the tree node. FIXME, currently this is only used to
+ // properly draw the graph rather than for the actual vectorization.
+ E->UserTreeIndices.push_back(UserTreeIdx);
DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n");
return;
}
if (ScalarToTreeEntry.count(VL[i])) {
DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] <<
") is already in tree.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
if (MustGather.count(VL[i])) {
DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
for (unsigned j = i+1; j < e; ++j)
if (VL[i] == VL[j]) {
DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
assert((!BS.getScheduleData(VL[0]) ||
!BS.getScheduleData(VL[0])->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
if (Term) {
DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
} else {
BS.cancelScheduling(VL);
}
- newTreeEntry(VL, Reuse, false);
+ newTreeEntry(VL, Reuse, false, UserTreeIdx);
return;
}
case Instruction::Load: {
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
}
LoadInst *L = cast<LoadInst>(VL[i]);
if (!L->isSimple()) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
if (Consecutive) {
++NumLoadsWantToKeepOrder;
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of loads.\n");
return;
}
}
}
if (ShuffledLoads) {
- newTreeEntry(NewVL, true, true);
+ newTreeEntry(NewVL, true, true, UserTreeIdx);
return;
}
}
}
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
if (ReverseConsecutive) {
++NumLoadsWantToChangeOrder;
Type *Ty = cast<Instruction>(Val)->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n");
return;
}
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth+1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
if (Cmp->getPredicate() != P0 ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n");
return;
}
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of compares.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth+1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
// Sort operands of the instructions so that each side is more likely to
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right);
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
+ buildTree_rec(Left, Depth + 1, UserTreeIdx);
+ buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth+1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
if (cast<Instruction>(Val)->getNumOperands() != 2) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
if (Ty0 != CurTy) {
DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
DEBUG(
dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
return;
}
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
return;
}
case Instruction::Call: {
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
return;
Value *A1J = CI2->getArgOperand(1);
if (A1I != A1J) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument "<< A1I<<"!=" << A1J
<< "\n");
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!="
<< *VL[i] << '\n');
return;
}
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
// then do not vectorize this instruction.
if (!isAltShuffle) {
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- newTreeEntry(VL, true, false);
+ newTreeEntry(VL, true, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderAltShuffleOperands(VL, Left, Right);
- buildTree_rec(Left, Depth + 1);
- buildTree_rec(Right, Depth + 1);
+ buildTree_rec(Left, Depth + 1, UserTreeIdx);
+ buildTree_rec(Right, Depth + 1, UserTreeIdx);
return;
}
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- buildTree_rec(Operands, Depth + 1);
+ buildTree_rec(Operands, Depth + 1, UserTreeIdx);
}
return;
}
default:
BS.cancelScheduling(VL);
- newTreeEntry(VL, false, false);
+ newTreeEntry(VL, false, false, UserTreeIdx);
DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
}
int SpillCost = getSpillCost();
Cost += SpillCost + ExtractCost;
- DEBUG(dbgs() << "SLP: Spill Cost = " << SpillCost << ".\n"
- << "SLP: Extract Cost = " << ExtractCost << ".\n"
- << "SLP: Total Cost = " << Cost << ".\n");
+ std::string Str;
+ {
+ raw_string_ostream OS(Str);
+ OS << "SLP: Spill Cost = " << SpillCost << ".\n"
+ << "SLP: Extract Cost = " << ExtractCost << ".\n"
+ << "SLP: Total Cost = " << Cost << ".\n";
+ }
+ DEBUG(dbgs() << Str);
+
+ if (ViewSLPTree)
+ ViewGraph(this, "SLP" + F->getName(), false, Str);
+
return Cost;
}