/// Bottom Up SLP Vectorizer.
class BoUpSLP {
+ struct TreeEntry;
+
public:
using ValueList = SmallVector<Value *, 8>;
using InstrList = SmallVector<Instruction *, 16>;
/// (ii) the index of the edge.
struct EdgeInfo {
EdgeInfo() = default;
- /// The index of the user TreeEntry in VectorizableTree.
- int Idx = -1;
+ EdgeInfo(TreeEntry *UserTE, unsigned EdgeIdx)
+ : UserTE(UserTE), EdgeIdx(EdgeIdx) {}
+ /// The user TreeEntry.
+ TreeEntry *UserTE = nullptr;
/// The operand index of the use.
unsigned EdgeIdx = UINT_MAX;
#ifndef NDEBUG
}
/// Debug print.
void dump(raw_ostream &OS) const {
- OS << "{User:" << Idx << " EdgeIdx:" << EdgeIdx << "}";
+ OS << "{User:" << (UserTE ? std::to_string(UserTE->Idx) : "null")
+ << " EdgeIdx:" << EdgeIdx << "}";
}
LLVM_DUMP_METHOD void dump() const { dump(dbgs()); }
#endif
};
private:
- struct TreeEntry;
-
/// Checks if all users of \p I are the part of the vectorization tree.
bool areAllUsersVectorized(Instruction *I) const;
int getEntryCost(TreeEntry *E);
/// This is the recursive part of buildTree.
- void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth, EdgeInfo EI);
+ void buildTree_rec(ArrayRef<Value *> Roots, unsigned Depth,
+ const EdgeInfo &EI);
/// \returns true if the ExtractElement/ExtractValue instructions in \p VL can
/// be vectorized to use the original vector (or aggregate "bitcast" to a
/// have multiple users so the data structure is not truly a tree.
SmallVector<EdgeInfo, 1> UserTreeIndices;
+ /// The index of this treeEntry in VectorizableTree.
+ int Idx = -1;
+
private:
/// The operands of each instruction in each lane Operands[op_index][lane].
/// Note: This helps avoid the replication of the code that performs the
void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
ArrayRef<Value *> OpVL,
ArrayRef<unsigned> ReuseShuffleIndices) {
- if (UserTreeIdx.Idx >= 0) {
- auto &VectorizableTree = Container;
- VectorizableTree[UserTreeIdx.Idx]->setOperand(UserTreeIdx.EdgeIdx, OpVL,
- ReuseShuffleIndices);
- }
+ if (UserTreeIdx.UserTE)
+ UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
+ ReuseShuffleIndices);
}
/// \returns the \p OpIdx operand of this TreeEntry.
#ifndef NDEBUG
/// Debug printer.
LLVM_DUMP_METHOD void dump() const {
+ dbgs() << Idx << ".\n";
for (unsigned OpI = 0, OpE = Operands.size(); OpI != OpE; ++OpI) {
dbgs() << "Operand " << OpI << ":\n";
for (const Value *V : Operands[OpI])
/// Create a new VectorizableTree entry.
TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
- EdgeInfo &UserTreeIdx,
+ const EdgeInfo &UserTreeIdx,
ArrayRef<unsigned> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
VectorizableTree.push_back(llvm::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
- int idx = VectorizableTree.size() - 1;
+ Last->Idx = VectorizableTree.size() - 1;
Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end());
Last->NeedToGather = !Vectorized;
Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(),
if (Vectorized) {
for (int i = 0, e = VL.size(); i != e; ++i) {
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
- ScalarToTreeEntry[VL[i]] = idx;
+ ScalarToTreeEntry[VL[i]] = Last->Idx;
}
} else {
MustGather.insert(VL.begin(), VL.end());
}
- if (UserTreeIdx.Idx >= 0)
+ if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);
Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
-
- UserTreeIdx.Idx = idx;
return Last;
}
/// Debug printer.
LLVM_DUMP_METHOD void dumpVectorizableTree() const {
for (unsigned Id = 0, IdE = VectorizableTree.size(); Id != IdE; ++Id) {
- dbgs() << Id << ".\n";
VectorizableTree[Id]->dump();
dbgs() << "\n";
}
ContainerTy &VT)
: ChildIteratorType::iterator_adaptor_base(W), VectorizableTree(VT) {}
- NodeRef operator*() { return VectorizableTree[I->Idx].get(); }
+ NodeRef operator*() { return I->UserTE; }
};
static NodeRef getEntryNode(BoUpSLP &R) {
}
void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
- EdgeInfo UserTreeIdx) {
+ const EdgeInfo &UserTreeIdx) {
assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
InstructionsState S = getSameOpcode(VL);
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
- UserTreeIdx.EdgeIdx = i;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
return;
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
}
}
- UserTreeIdx.EdgeIdx = 0;
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- UserTreeIdx.EdgeIdx = 1;
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
case Instruction::Select:
case Instruction::AShr:
case Instruction::And:
case Instruction::Or:
- case Instruction::Xor:
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ case Instruction::Xor: {
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of bin op.\n");
// Sort operands of the instructions so that each side is more likely to
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
- UserTreeIdx.EdgeIdx = 0;
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- UserTreeIdx.EdgeIdx = 1;
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
-
+ }
case Instruction::GetElementPtr: {
// We don't combine GEPs with complicated (nested) indexing.
for (unsigned j = 0; j < VL.size(); ++j) {
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
return;
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
- UserTreeIdx.EdgeIdx = 0;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, 0});
return;
}
case Instruction::Call: {
}
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
CallInst *CI2 = dyn_cast<CallInst>(j);
Operands.push_back(CI2->getArgOperand(i));
}
- UserTreeIdx.EdgeIdx = i;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
}
- case Instruction::ShuffleVector:
+ case Instruction::ShuffleVector: {
// If this is not an alternate sequence of opcode like add-sub
// then do not vectorize this instruction.
if (!S.isAltShuffle()) {
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
- UserTreeIdx.EdgeIdx = 0;
- buildTree_rec(Left, Depth + 1, UserTreeIdx);
- UserTreeIdx.EdgeIdx = 1;
- buildTree_rec(Right, Depth + 1, UserTreeIdx);
+ buildTree_rec(Left, Depth + 1, {TE, 0});
+ buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(i));
- UserTreeIdx.EdgeIdx = i;
- buildTree_rec(Operands, Depth + 1, UserTreeIdx);
+ buildTree_rec(Operands, Depth + 1, {TE, i});
}
return;
-
+ }
default:
BS.cancelScheduling(VL, VL0);
newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);