/// Bottom Up SLP Vectorizer.
class BoUpSLP {
struct TreeEntry;
+ struct ScheduleData;
public:
using ValueList = SmallVector<Value *, 8>;
public:
/// Set this bundle's \p OpIdx'th operand to \p OpVL.
- void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL,
- ArrayRef<unsigned> ReuseShuffleIndices) {
+ void setOperand(unsigned OpIdx, ArrayRef<Value *> OpVL) {
if (Operands.size() < OpIdx + 1)
Operands.resize(OpIdx + 1);
assert(Operands[OpIdx].size() == 0 && "Already resized?");
Operands[OpIdx].resize(Scalars.size());
for (unsigned Lane = 0, E = Scalars.size(); Lane != E; ++Lane)
- Operands[OpIdx][Lane] = (!ReuseShuffleIndices.empty())
- ? OpVL[ReuseShuffleIndices[Lane]]
- : OpVL[Lane];
- }
-
- /// If there is a user TreeEntry, then set its operand.
- void trySetUserTEOperand(const EdgeInfo &UserTreeIdx,
- ArrayRef<Value *> OpVL,
- ArrayRef<unsigned> ReuseShuffleIndices) {
- if (UserTreeIdx.UserTE)
- UserTreeIdx.UserTE->setOperand(UserTreeIdx.EdgeIdx, OpVL,
- ReuseShuffleIndices);
+ Operands[OpIdx][Lane] = OpVL[Lane];
+ }
+
+ /// Set the operands of this bundle in their original order.
+ void setOperandsInOrder() {
+ assert(Operands.empty() && "Already initialized?");
+ auto *I0 = cast<Instruction>(Scalars[0]);
+ Operands.resize(I0->getNumOperands());
+ unsigned NumLanes = Scalars.size();
+ for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
+ OpIdx != NumOperands; ++OpIdx) {
+ Operands[OpIdx].resize(NumLanes);
+ for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+ auto *I = cast<Instruction>(Scalars[Lane]);
+ assert(I->getNumOperands() == NumOperands &&
+ "Expected same number of operands");
+ Operands[OpIdx][Lane] = I->getOperand(OpIdx);
+ }
+ }
}
/// \returns the \p OpIdx operand of this TreeEntry.
return Operands[OpIdx];
}
+ /// \returns the number of operands.
+ unsigned getNumOperands() const { return Operands.size(); }
+
/// \return the single \p OpIdx operand.
Value *getSingleOperand(unsigned OpIdx) const {
assert(OpIdx < Operands.size() && "Off bounds");
};
/// Create a new VectorizableTree entry.
- TreeEntry *newTreeEntry(ArrayRef<Value *> VL, bool Vectorized,
+ TreeEntry *newTreeEntry(ArrayRef<Value *> VL,
+ Optional<ScheduleData *> Bundle,
const EdgeInfo &UserTreeIdx,
ArrayRef<unsigned> ReuseShuffleIndices = None,
ArrayRef<unsigned> ReorderIndices = None) {
+ bool Vectorized = (bool)Bundle;
VectorizableTree.push_back(std::make_unique<TreeEntry>(VectorizableTree));
TreeEntry *Last = VectorizableTree.back().get();
Last->Idx = VectorizableTree.size() - 1;
assert(!getTreeEntry(VL[i]) && "Scalar already in tree!");
ScalarToTreeEntry[VL[i]] = Last;
}
+ // Update the scheduler bundle to point to this TreeEntry.
+ unsigned Lane = 0;
+ for (ScheduleData *BundleMember = Bundle.getValue(); BundleMember;
+ BundleMember = BundleMember->NextInBundle) {
+ BundleMember->TE = Last;
+ BundleMember->Lane = Lane;
+ ++Lane;
+ }
+ assert((!Bundle.getValue() || Lane == VL.size()) &&
+ "Bundle and VL out of sync");
} else {
MustGather.insert(VL.begin(), VL.end());
}
if (UserTreeIdx.UserTE)
Last->UserTreeIndices.push_back(UserTreeIdx);
- Last->trySetUserTEOperand(UserTreeIdx, VL, ReuseShuffleIndices);
return Last;
}
UnscheduledDepsInBundle = UnscheduledDeps;
clearDependencies();
OpValue = OpVal;
+ TE = nullptr;
+ Lane = -1;
}
/// Returns true if the dependency information has been calculated.
/// Opcode of the current instruction in the schedule data.
Value *OpValue = nullptr;
+
+ /// The TreeEntry that this instruction corresponds to.
+ TreeEntry *TE = nullptr;
+
+ /// The lane of this node in the TreeEntry.
+ int Lane = -1;
};
#ifndef NDEBUG
continue;
}
// Handle the def-use chain dependencies.
- for (Use &U : BundleMember->Inst->operands()) {
- auto *I = dyn_cast<Instruction>(U.get());
- if (!I)
- continue;
+
+ // Decrement the unscheduled counter and insert to ready list if ready.
+ auto &&DecrUnsched = [this, &ReadyList](Instruction *I) {
doForAllOpcodes(I, [&ReadyList](ScheduleData *OpDef) {
if (OpDef && OpDef->hasValidDependencies() &&
OpDef->incrementUnscheduledDeps(-1) == 0) {
<< "SLP: gets ready (def): " << *DepBundle << "\n");
}
});
+ };
+
+ // If BundleMember is a vector bundle, its operands may have been
+ // reordered duiring buildTree(). We therefore need to get its operands
+ // through the TreeEntry.
+ if (TreeEntry *TE = BundleMember->TE) {
+ int Lane = BundleMember->Lane;
+ assert(Lane >= 0 && "Lane not set");
+ for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands();
+ OpIdx != NumOperands; ++OpIdx)
+ if (auto *I = dyn_cast<Instruction>(TE->getOperand(OpIdx)[Lane]))
+ DecrUnsched(I);
+ } else {
+ // If BundleMember is a stand-alone instruction, no operand reordering
+ // has taken place, so we directly access its operands.
+ for (Use &U : BundleMember->Inst->operands())
+ if (auto *I = dyn_cast<Instruction>(U.get()))
+ DecrUnsched(I);
}
// Handle the memory dependencies.
for (ScheduleData *MemoryDepSD : BundleMember->MemoryDependencies) {
/// Checks if a bundle of instructions can be scheduled, i.e. has no
/// cyclic dependencies. This is only a dry-run, no instructions are
/// actually moved at this stage.
- bool tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
- const InstructionsState &S);
+ /// \returns the scheduling bundle. The returned Optional value is non-None
+ /// if \p VL is allowed to be scheduled.
+ Optional<ScheduleData *>
+ tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
+ const InstructionsState &S);
/// Un-bundles a group of instructions.
void cancelScheduling(ArrayRef<Value *> VL, Value *OpValue);
InstructionsState S = getSameOpcode(VL);
if (Depth == RecursionMaxDepth) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
// Don't handle vectors.
if (S.OpValue->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
if (StoreInst *SI = dyn_cast<StoreInst>(S.OpValue))
if (SI->getValueOperand()->getType()->isVectorTy()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
// If all of the operands are identical or constant we have a simple solution.
if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
if (EphValues.count(VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
<< ") is ephemeral.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
}
LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n");
if (!E->isSame(VL)) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
// Record the reuse of the tree node. FIXME, currently this is only used to
E->UserTreeIndices.push_back(UserTreeIdx);
LLVM_DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue
<< ".\n");
- E->trySetUserTEOperand(UserTreeIdx, VL, None);
return;
}
if (getTreeEntry(I)) {
LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i]
<< ") is already in tree.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
}
for (unsigned i = 0, e = VL.size(); i != e; ++i) {
if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) {
LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
}
// Don't go into unreachable blocks. They may contain instructions with
// dependency cycles which confuse the final scheduling.
LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n");
if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) {
LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n");
- newTreeEntry(VL, false, UserTreeIdx);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx);
return;
}
VL = UniqueValues;
BlockScheduling &BS = *BSRef.get();
- if (!BS.tryScheduleBundle(VL, this, S)) {
+ Optional<ScheduleData *> Bundle = BS.tryScheduleBundle(VL, this, S);
+ if (!Bundle) {
LLVM_DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n");
assert((!BS.getScheduleData(VL0) ||
!BS.getScheduleData(VL0)->isPartOfBundle()) &&
"tryScheduleBundle should cancelScheduling on failure");
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n");
LLVM_DEBUG(dbgs()
<< "SLP: Need to swizzle PHINodes (terminator use).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE =
+ newTreeEntry(VL, Bundle, UserTreeIdx, ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n");
+ // Keeps the reordered operands to avoid code duplication.
+ SmallVector<ValueList, 2> OperandsVec;
for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (Value *j : VL)
Operands.push_back(cast<PHINode>(j)->getIncomingValueForBlock(
PH->getIncomingBlock(i)));
-
- buildTree_rec(Operands, Depth + 1, {TE, i});
+ TE->setOperand(i, Operands);
+ OperandsVec.push_back(Operands);
}
+ for (unsigned OpIdx = 0, OpE = OperandsVec.size(); OpIdx != OpE; ++OpIdx)
+ buildTree_rec(OperandsVec[OpIdx], Depth + 1, {TE, OpIdx});
return;
}
case Instruction::ExtractValue:
if (Reuse) {
LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n");
++NumOpsWantToKeepOriginalOrder;
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
+ newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
ReuseShuffleIndicies);
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
+ VectorizableTree.back()->setOperand(0, Op0);
return;
}
if (!CurrentOrder.empty()) {
auto StoredCurrentOrderAndNum =
NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
++StoredCurrentOrderAndNum->getSecond();
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies,
+ newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies,
StoredCurrentOrderAndNum->getFirst());
// This is a special case, as it does not gather, but at the same time
// we are not extending buildTree_rec() towards the operands.
ValueList Op0;
Op0.assign(VL.size(), VL0->getOperand(0));
- VectorizableTree.back()->setOperand(0, Op0, ReuseShuffleIndicies);
+ VectorizableTree.back()->setOperand(0, Op0);
return;
}
LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n");
- newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
BS.cancelScheduling(VL, VL0);
return;
}
if (DL->getTypeSizeInBits(ScalarTy) !=
DL->getTypeAllocSizeInBits(ScalarTy)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
return;
}
auto *L = cast<LoadInst>(V);
if (!L->isSimple()) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
return;
}
if (CurrentOrder.empty()) {
// Original loads are consecutive and does not require reordering.
++NumOpsWantToKeepOriginalOrder;
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
} else {
// Need to reorder.
auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first;
++I->getSecond();
- newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx,
- ReuseShuffleIndicies, I->getFirst());
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies, I->getFirst());
+ TE->setOperandsInOrder();
LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
}
return;
LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
case Instruction::ZExt:
Type *Ty = cast<Instruction>(VL[i])->getOperand(0)->getType();
if (Ty != SrcTy || !isValidElementType(Ty)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs()
<< "SLP: Gathering casts with different src types.\n");
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) ||
Cmp->getOperand(0)->getType() != ComparedTy) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs()
<< "SLP: Gathering cmp with different predicate.\n");
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
ValueList Left, Right;
Right.push_back(RHS);
}
}
-
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor: {
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
// Sort operands of the instructions so that each side is more likely to
if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
if (cast<Instruction>(VL[j])->getNumOperands() != 2) {
LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (different types).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
LLVM_DEBUG(dbgs()
<< "SLP: not-vectorizable GEP (non-constant indexes).\n");
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n");
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = 2; i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
for (unsigned i = 0, e = VL.size() - 1; i < e; ++i)
if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n");
return;
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
ValueList Operands;
for (Value *j : VL)
Operands.push_back(cast<Instruction>(j)->getOperand(0));
-
+ TE->setOperandsInOrder();
buildTree_rec(Operands, Depth + 1, {TE, 0});
return;
}
Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
if (!isTriviallyVectorizable(ID)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
return;
}
getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
!CI->hasIdenticalOperandBundleSchema(*CI2)) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i]
<< "\n");
return;
Value *A1J = CI2->getArgOperand(j);
if (ScalarArgs[j] != A1J) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI
<< " argument " << ScalarArgs[j] << "!=" << A1J
<< "\n");
CI->op_begin() + CI->getBundleOperandsEndIndex(),
CI2->op_begin() + CI2->getBundleOperandsStartIndex())) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:"
<< *CI << "!=" << *VL[i] << '\n');
return;
}
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) {
ValueList Operands;
// Prepare the operand vector.
// then do not vectorize this instruction.
if (!S.isAltShuffle()) {
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n");
return;
}
- auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies);
+ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n");
// Reorder operands if reordering would enable vectorization.
if (isa<BinaryOperator>(VL0)) {
ValueList Left, Right;
reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE);
+ TE->setOperand(0, Left);
+ TE->setOperand(1, Right);
buildTree_rec(Left, Depth + 1, {TE, 0});
buildTree_rec(Right, Depth + 1, {TE, 1});
return;
}
+ TE->setOperandsInOrder();
for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
ValueList Operands;
// Prepare the operand vector.
}
default:
BS.cancelScheduling(VL, VL0);
- newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies);
+ newTreeEntry(VL, None /*not vectorized*/, UserTreeIdx,
+ ReuseShuffleIndicies);
LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n");
return;
}
// Groups the instructions to a bundle (which is then a single scheduling entity)
// and schedules instructions until the bundle gets ready.
-bool BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL,
- BoUpSLP *SLP,
- const InstructionsState &S) {
+Optional<BoUpSLP::ScheduleData *>
+BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
+ const InstructionsState &S) {
if (isa<PHINode>(S.OpValue))
- return true;
+ return nullptr;
// Initialize the instruction bundle.
Instruction *OldScheduleEnd = ScheduleEnd;
// instructions of the bundle.
for (Value *V : VL) {
if (!extendSchedulingRegion(V, S))
- return false;
+ return None;
}
for (Value *V : VL) {
}
if (!Bundle->isReady()) {
cancelScheduling(VL, S.OpValue);
- return false;
+ return None;
}
- return true;
+ return Bundle;
}
void BoUpSLP::BlockScheduling::cancelScheduling(ArrayRef<Value *> VL,