/// Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
- /// \return the number of registers in the target-provided register class.
- unsigned getNumberOfRegisters(unsigned ClassID) const;
-
- /// \return the target-provided register class ID for the provided type,
- /// accounting for type promotion and other type-legalization techniques that the target might apply.
- /// However, it specifically does not account for the scalarization or splitting of vector types.
- /// Should a vector type require scalarization or splitting into multiple underlying vector registers,
- /// that type should be mapped to a register class containing no registers.
- /// Specifically, this is designed to provide a simple, high-level view of the register allocation
- /// later performed by the backend. These register classes don't necessarily map onto the
- /// register classes used by the backend.
- /// FIXME: It's not currently possible to determine how many registers
- /// are used by the provided type.
- unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
-
- /// \return the target-provided register class name
- const char* getRegisterClassName(unsigned ClassID) const;
+ /// \return The number of scalar or vector registers that the target has.
+ /// If 'Vectors' is true, it returns the number of vector registers. If it is
+ /// set to false, it returns the number of scalar registers.
+ unsigned getNumberOfRegisters(bool Vector) const;
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
Type *Ty) = 0;
virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
Type *Ty) = 0;
- virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
- virtual unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const = 0;
- virtual const char* getRegisterClassName(unsigned ClassID) const = 0;
+ virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() = 0;
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
Type *Ty) override {
return Impl.getIntImmCost(IID, Idx, Imm, Ty);
}
- unsigned getNumberOfRegisters(unsigned ClassID) const override {
- return Impl.getNumberOfRegisters(ClassID);
- }
- unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const override {
- return Impl.getRegisterClassForType(Vector, Ty);
- }
- const char* getRegisterClassName(unsigned ClassID) const override {
- return Impl.getRegisterClassName(ClassID);
+ unsigned getNumberOfRegisters(bool Vector) override {
+ return Impl.getNumberOfRegisters(Vector);
}
unsigned getRegisterBitWidth(bool Vector) const override {
return Impl.getRegisterBitWidth(Vector);
return TTI::TCC_Free;
}
- unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; }
-
- unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const {
- return Vector ? 1 : 0;
- };
-
- const char* getRegisterClassName(unsigned ClassID) const {
- switch (ClassID) {
- default:
- return "Generic::Unknown Register Class";
- case 0: return "Generic::ScalarRC";
- case 1: return "Generic::VectorRC";
- }
- }
+ unsigned getNumberOfRegisters(bool Vector) { return 8; }
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
/// \name Vector TTI Implementations
/// @{
+ unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
+
unsigned getRegisterBitWidth(bool Vector) const { return 32; }
/// Estimate the overhead of scalarizing an instruction. Insert and Extract
return Cost;
}
-unsigned TargetTransformInfo::getNumberOfRegisters(unsigned ClassID) const {
- return TTIImpl->getNumberOfRegisters(ClassID);
-}
-
-unsigned TargetTransformInfo::getRegisterClassForType(bool Vector, Type *Ty) const {
- return TTIImpl->getRegisterClassForType(Vector, Ty);
-}
-
-const char* TargetTransformInfo::getRegisterClassName(unsigned ClassID) const {
- return TTIImpl->getRegisterClassName(ClassID);
+unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
+ return TTIImpl->getNumberOfRegisters(Vector);
}
unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
bool enableInterleavedAccessVectorization() { return true; }
- unsigned getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
+ unsigned getNumberOfRegisters(bool Vector) {
if (Vector) {
if (ST->hasNEON())
return 32;
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
+ unsigned getNumberOfRegisters(bool Vector) {
if (Vector) {
if (ST->hasNEON())
return 16;
return true;
}
-unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
- assert(ClassID == GPRRC || ClassID == FPRRC ||
- ClassID == VRRC || ClassID == VSXRC);
- if (ST->hasVSX()) {
- assert(ClassID == GPRRC || ClassID == VSXRC);
- return ClassID == GPRRC ? 32 : 64;
- }
- assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
- return 32;
-}
-
-unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
- if (Vector)
- return ST->hasVSX() ? VSXRC : VRRC;
- else if (Ty && Ty->getScalarType()->isFloatTy())
- return ST->hasVSX() ? VSXRC : FPRRC;
- else
- return GPRRC;
-}
-
-const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
-
- switch (ClassID) {
- default:
- llvm_unreachable("unknown register class");
- return "PPC::unknown register class";
- case GPRRC: return "PPC::GPRRC";
- case FPRRC: return "PPC::FPRRC";
- case VRRC: return "PPC::VRRC";
- case VSXRC: return "PPC::VSXRC";
- }
+unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
+ if (Vector && !ST->hasAltivec() && !ST->hasQPX())
+ return 0;
+ return ST->hasVSX() ? 64 : 32;
}
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
-
- enum PPCRegisterClass {
- GPRRC, FPRRC, VRRC, VSXRC
- };
- unsigned getNumberOfRegisters(unsigned ClassID) const;
- unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
- const char* getRegisterClassName(unsigned ClassID) const;
+ unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
C2.ScaleCost, C2.SetupCost);
}
-unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
+unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
// be used in an address.
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize() { return 256; }
return TargetTransformInfo::PSK_FastHardware;
}
-unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
- unsigned Result = BaseT::getNumberOfRegisters(ClassID);
+unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) {
+ unsigned Result = BaseT::getNumberOfRegisters(Vector);
// For SIMD, use at least 16 registers, as a rough guess.
- bool Vector = (ClassID == 1);
if (Vector)
Result = std::max(Result, 16u);
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
-unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
+unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
if (Vector && !ST->hasSSE1())
return 0;
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getNumberOfRegisters(bool Vector);
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
- unsigned getNumberOfRegisters(unsigned ClassID) const {
- bool Vector = (ClassID == 1);
+ unsigned getNumberOfRegisters(bool Vector) {
if (Vector) {
return 0;
}
// Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
// additional instruction (at least fill).
- // TODO: Need distinguish register class?
- unsigned TTIRegNum = TTI->getNumberOfRegisters(
- TTI->getRegisterClassForType(false, F.getType())) - 1;
+ unsigned TTIRegNum = TTI->getNumberOfRegisters(false) - 1;
if (C.NumRegs > TTIRegNum) {
// Cost already exceeded TTIRegNum, then only newly added register can add
// new instructions.
/// of a loop.
struct RegisterUsage {
/// Holds the number of loop invariant values that are used in the loop.
- /// The key is ClassID of target-provided register class.
- SmallMapVector<unsigned, unsigned, 4> LoopInvariantRegs;
+ unsigned LoopInvariantRegs;
+
/// Holds the maximum number of concurrent live intervals in the loop.
- /// The key is ClassID of target-provided register class.
- SmallMapVector<unsigned, unsigned, 4> MaxLocalUsers;
+ unsigned MaxLocalUsers;
};
/// \return Returns information about the register usages of the loop for the
// Select the largest VF which doesn't require more registers than existing
// ones.
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(true);
for (int i = RUs.size() - 1; i >= 0; --i) {
- bool Selected = true;
- for (auto& pair : RUs[i].MaxLocalUsers) {
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
- if (pair.second > TargetNumRegisters)
- Selected = false;
- }
- if (Selected) {
+ if (RUs[i].MaxLocalUsers <= TargetNumRegisters) {
MaxVF = VFs[i];
break;
}
if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
return 1;
+ unsigned TargetNumRegisters = TTI.getNumberOfRegisters(VF > 1);
+ LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
+ << " registers\n");
+
+ if (VF == 1) {
+ if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumScalarRegs;
+ } else {
+ if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
+ TargetNumRegisters = ForceTargetNumVectorRegs;
+ }
+
RegisterUsage R = calculateRegisterUsage({VF})[0];
// We divide by these constants so assume that we have at least one
// instruction that uses at least one register.
- for (auto& pair : R.MaxLocalUsers) {
- pair.second = std::max(pair.second, 1U);
- }
+ R.MaxLocalUsers = std::max(R.MaxLocalUsers, 1U);
// We calculate the interleave count using the following formula.
// Subtract the number of loop invariants from the number of available
// We also want power of two interleave counts to ensure that the induction
// variable of the vector loop wraps to zero, when tail is folded by masking;
// this currently happens when OptForSize, in which case IC is set to 1 above.
- unsigned IC = UINT_MAX;
+ unsigned IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs) /
+ R.MaxLocalUsers);
- for (auto& pair : R.MaxLocalUsers) {
- unsigned TargetNumRegisters = TTI.getNumberOfRegisters(pair.first);
- LLVM_DEBUG(dbgs() << "LV: The target has " << TargetNumRegisters
- << " registers of "
- << TTI.getRegisterClassName(pair.first) << " register class\n");
- if (VF == 1) {
- if (ForceTargetNumScalarRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumScalarRegs;
- } else {
- if (ForceTargetNumVectorRegs.getNumOccurrences() > 0)
- TargetNumRegisters = ForceTargetNumVectorRegs;
- }
- unsigned MaxLocalUsers = pair.second;
- unsigned LoopInvariantRegs = 0;
- if (R.LoopInvariantRegs.find(pair.first) != R.LoopInvariantRegs.end())
- LoopInvariantRegs = R.LoopInvariantRegs[pair.first];
-
- unsigned TmpIC = PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs) / MaxLocalUsers);
- // Don't count the induction variable as interleaved.
- if (EnableIndVarRegisterHeur) {
- TmpIC =
- PowerOf2Floor((TargetNumRegisters - LoopInvariantRegs - 1) /
- std::max(1U, (MaxLocalUsers - 1)));
- }
-
- IC = std::min(IC, TmpIC);
- }
+ // Don't count the induction variable as interleaved.
+ if (EnableIndVarRegisterHeur)
+ IC = PowerOf2Floor((TargetNumRegisters - R.LoopInvariantRegs - 1) /
+ std::max(1U, (R.MaxLocalUsers - 1)));
// Clamp the interleave ranges to reasonable counts.
unsigned MaxInterleaveCount = TTI.getMaxInterleaveFactor(VF);
const DataLayout &DL = TheFunction->getParent()->getDataLayout();
SmallVector<RegisterUsage, 8> RUs(VFs.size());
- SmallVector<SmallMapVector<unsigned, unsigned, 4>, 8> MaxUsages(VFs.size());
+ SmallVector<unsigned, 8> MaxUsages(VFs.size(), 0);
LLVM_DEBUG(dbgs() << "LV(REG): Calculating max register usage:\n");
// For each VF find the maximum usage of registers.
for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
- // Count the number of live intervals.
- SmallMapVector<unsigned, unsigned, 4> RegUsage;
-
if (VFs[j] == 1) {
- for (auto Inst : OpenIntervals) {
- unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
- if (RegUsage.find(ClassID) == RegUsage.end())
- RegUsage[ClassID] = 1;
- else
- RegUsage[ClassID] += 1;
- }
- } else {
- collectUniformsAndScalars(VFs[j]);
- for (auto Inst : OpenIntervals) {
- // Skip ignored values for VF > 1.
- if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end())
- continue;
- if (isScalarAfterVectorization(Inst, VFs[j])) {
- unsigned ClassID = TTI.getRegisterClassForType(false, Inst->getType());
- if (RegUsage.find(ClassID) == RegUsage.end())
- RegUsage[ClassID] = 1;
- else
- RegUsage[ClassID] += 1;
- } else {
- unsigned ClassID = TTI.getRegisterClassForType(true, Inst->getType());
- if (RegUsage.find(ClassID) == RegUsage.end())
- RegUsage[ClassID] = GetRegUsage(Inst->getType(), VFs[j]);
- else
- RegUsage[ClassID] += GetRegUsage(Inst->getType(), VFs[j]);
- }
- }
+ MaxUsages[j] = std::max(MaxUsages[j], OpenIntervals.size());
+ continue;
}
-
- for (auto& pair : RegUsage) {
- if (MaxUsages[j].find(pair.first) != MaxUsages[j].end())
- MaxUsages[j][pair.first] = std::max(MaxUsages[j][pair.first], pair.second);
- else
- MaxUsages[j][pair.first] = pair.second;
+ collectUniformsAndScalars(VFs[j]);
+ // Count the number of live intervals.
+ unsigned RegUsage = 0;
+ for (auto Inst : OpenIntervals) {
+ // Skip ignored values for VF > 1.
+ if (VecValuesToIgnore.find(Inst) != VecValuesToIgnore.end() ||
+ isScalarAfterVectorization(Inst, VFs[j]))
+ continue;
+ RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
}
+ MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
}
LLVM_DEBUG(dbgs() << "LV(REG): At #" << i << " Interval # "
}
for (unsigned i = 0, e = VFs.size(); i < e; ++i) {
- SmallMapVector<unsigned, unsigned, 4> Invariant;
-
- for (auto Inst : LoopInvariants) {
- unsigned Usage = VFs[i] == 1 ? 1 : GetRegUsage(Inst->getType(), VFs[i]);
- unsigned ClassID = TTI.getRegisterClassForType(VFs[i] > 1, Inst->getType());
- if (Invariant.find(ClassID) == Invariant.end())
- Invariant[ClassID] = Usage;
- else
- Invariant[ClassID] += Usage;
+ unsigned Invariant = 0;
+ if (VFs[i] == 1)
+ Invariant = LoopInvariants.size();
+ else {
+ for (auto Inst : LoopInvariants)
+ Invariant += GetRegUsage(Inst->getType(), VFs[i]);
}
LLVM_DEBUG(dbgs() << "LV(REG): VF = " << VFs[i] << '\n');
- LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: "
- << MaxUsages[i].size() << " item\n");
- for (const auto& Pair : MaxUsages[i]) {
- (void)Pair;
- LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
- << TTI.getRegisterClassName(Pair.first)
- << ", " << Pair.second << " registers \n");
- }
- LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: "
- << Invariant.size() << " item\n");
- for (const auto& Pair : Invariant) {
- (void)Pair;
- LLVM_DEBUG(dbgs() << "LV(REG): RegisterClass: "
- << TTI.getRegisterClassName(Pair.first)
- << ", " << Pair.second << " registers \n");
- }
+ LLVM_DEBUG(dbgs() << "LV(REG): Found max usage: " << MaxUsages[i] << '\n');
+ LLVM_DEBUG(dbgs() << "LV(REG): Found invariant usage: " << Invariant
+ << '\n');
RU.LoopInvariantRegs = Invariant;
RU.MaxLocalUsers = MaxUsages[i];
// The second condition is necessary because, even if the target has no
// vector registers, loop vectorization may still enable scalar
// interleaving.
- if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)) &&
- TTI->getMaxInterleaveFactor(1) < 2)
+ if (!TTI->getNumberOfRegisters(true) && TTI->getMaxInterleaveFactor(1) < 2)
return false;
bool Changed = false;
// If the target claims to have no vector registers don't attempt
// vectorization.
- if (!TTI->getNumberOfRegisters(TTI->getRegisterClassForType(true)))
+ if (!TTI->getNumberOfRegisters(true))
return false;
// Don't vectorize when the attribute NoImplicitFloat is used.
+++ /dev/null
-; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
-; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
-; REQUIRES: asserts
-
-@a = global [1024 x i8] zeroinitializer, align 16
-@b = global [1024 x i8] zeroinitializer, align 16
-
-define i32 @foo() {
-;
-; CHECK-LABEL: foo
-
-; CHECK: LV(REG): VF = 8
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 7 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK: LV(REG): VF = 16
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-
-; CHECK-PWR8: LV(REG): VF = 16
-; CHECK-PWR8-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers
-; CHECK-PWR8-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK-PWR8: Setting best plan to VF=16, UF=4
-
-; CHECK-PWR9: LV(REG): VF = 8
-; CHECK-PWR9-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 7 registers
-; CHECK-PWR9-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK-PWR9: Setting best plan to VF=8, UF=8
-
-
-entry:
- br label %for.body
-
-for.cond.cleanup:
- %add.lcssa = phi i32 [ %add, %for.body ]
- ret i32 %add.lcssa
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
- %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
- %0 = load i8, i8* %arrayidx, align 1
- %conv = zext i8 %0 to i32
- %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
- %1 = load i8, i8* %arrayidx2, align 1
- %conv3 = zext i8 %1 to i32
- %sub = sub nsw i32 %conv, %conv3
- %ispos = icmp sgt i32 %sub, -1
- %neg = sub nsw i32 0, %sub
- %2 = select i1 %ispos, i32 %sub, i32 %neg
- %add = add nsw i32 %2, %s.015
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 1024
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-define i32 @goo() {
-; For indvars.iv used in a computating chain only feeding into getelementptr or cmp,
-; it will not have vector version and the vector register usage will not exceed the
-; available vector register number.
-; CHECK-LABEL: goo
-; CHECK: LV(REG): VF = 8
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 7 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK: LV(REG): VF = 16
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK: LV(REG): VF = 16
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 13 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-
-; CHECK: Setting best plan to VF=16, UF=4
-
-entry:
- br label %for.body
-
-for.cond.cleanup: ; preds = %for.body
- %add.lcssa = phi i32 [ %add, %for.body ]
- ret i32 %add.lcssa
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
- %tmp1 = add nsw i64 %indvars.iv, 3
- %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
- %tmp = load i8, i8* %arrayidx, align 1
- %conv = zext i8 %tmp to i32
- %tmp2 = add nsw i64 %indvars.iv, 2
- %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
- %tmp3 = load i8, i8* %arrayidx2, align 1
- %conv3 = zext i8 %tmp3 to i32
- %sub = sub nsw i32 %conv, %conv3
- %ispos = icmp sgt i32 %sub, -1
- %neg = sub nsw i32 0, %sub
- %tmp4 = select i1 %ispos, i32 %sub, i32 %neg
- %add = add nsw i32 %tmp4, %s.015
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 1024
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-define i64 @bar(i64* nocapture %a) {
-; CHECK-LABEL: bar
-; CHECK: LV(REG): VF = 2
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-
-; CHECK: Setting best plan to VF=2, UF=12
-
-entry:
- br label %for.body
-
-for.cond.cleanup:
- %add2.lcssa = phi i64 [ %add2, %for.body ]
- ret i64 %add2.lcssa
-
-for.body:
- %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
- %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
- %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
- %0 = load i64, i64* %arrayidx, align 8
- %add = add nsw i64 %0, %i.012
- store i64 %add, i64* %arrayidx, align 8
- %add2 = add nsw i64 %add, %s.011
- %inc = add nuw nsw i64 %i.012, 1
- %exitcond = icmp eq i64 %inc, 1024
- br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-@d = external global [0 x i64], align 8
-@e = external global [0 x i32], align 4
-@c = external global [0 x i32], align 4
-
-define void @hoo(i32 %n) {
-; CHECK-LABEL: hoo
-; CHECK: LV(REG): VF = 4
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 2 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK: LV(REG): VF = 1
-; CHECK-NEXT: LV(REG): Found max usage: 1 item
-; CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-; CHECK: Setting best plan to VF=1, UF=12
-
-entry:
- br label %for.body
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
- %tmp = load i64, i64* %arrayidx, align 8
- %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
- %tmp1 = load i32, i32* %arrayidx1, align 4
- %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
- store i32 %tmp1, i32* %arrayidx3, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv.next, 10000
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.body
- ret void
-}
target triple = "x86_64-unknown-linux-gnu"
; CHECK: LV: Checking a loop in "test_g"
-; CHECK: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
+; CHECK: LV(REG): Found max usage: 2
define i32 @test_g(i32* nocapture readonly %a, i32 %n) local_unnamed_addr !dbg !6 {
entry:
}
; CHECK: LV: Checking a loop in "test"
-; CHECK: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
+; CHECK: LV(REG): Found max usage: 2
define i32 @test(i32* nocapture readonly %a, i32 %n) local_unnamed_addr {
entry:
;
; CHECK-LABEL: foo
; CHECK: LV(REG): VF = 8
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
+; CHECK-NEXT: LV(REG): Found max usage: 7
; CHECK: LV(REG): VF = 16
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 13 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
+; CHECK-NEXT: LV(REG): Found max usage: 13
entry:
br label %for.body
; available vector register number.
; CHECK-LABEL: goo
; CHECK: LV(REG): VF = 8
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 7 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
+; CHECK-NEXT: LV(REG): Found max usage: 7
; CHECK: LV(REG): VF = 16
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 13 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
+; CHECK-NEXT: LV(REG): Found max usage: 13
entry:
br label %for.body
define i64 @bar(i64* nocapture %a) {
; CHECK-LABEL: bar
; CHECK: LV(REG): VF = 2
-; CHECK-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 3 registers
-; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 1 registers
-; CHECK-NEXT: LV(REG): Found invariant usage: 0 item
-
+; CHECK: LV(REG): Found max usage: 3
+;
entry:
br label %for.body
; so the max usage of AVX512 vector register will be 2.
; AVX512F-LABEL: bar
; AVX512F: LV(REG): VF = 16
-; AVX512F-CHECK: LV(REG): Found max usage: 2 item
-; AVX512F-CHECK: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers
-; AVX512F-CHECK: LV(REG): RegisterClass: Generic::VectorRC, 2 registers
-; AVX512F-CHECK: LV(REG): Found invariant usage: 0 item
-
+; AVX512F: LV(REG): Found max usage: 2
+;
entry:
br label %for.body