/// this target, for a load/store of the specified type.
/// The type may be VoidTy, in which case only return true if the addressing
/// mode is legal for a load/store of any legal type.
+ /// If target returns true in LSRWithInstrQueries(), I may be valid.
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace = 0) const;
+ unsigned AddrSpace = 0,
+ Instruction *I = nullptr) const;
/// \brief Return true if LSR cost of C1 is lower than C1.
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace = 0) const;
+ /// \brief Return true if the loop strength reduce pass should make
+ /// Instruction* based TTI queries to isLegalAddressingMode(). This is
+ /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
+ /// immediate offset and no index register.
+ bool LSRWithInstrQueries() const;
+
/// \brief Return true if target supports the load / store
/// instruction with the given Offset on the form reg + Offset. It
/// may be that Offset is too big for a certain type (register
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale,
- unsigned AddrSpace) = 0;
+ unsigned AddrSpace,
+ Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
int64_t Scale, unsigned AddrSpace) = 0;
+ virtual bool LSRWithInstrQueries() = 0;
virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
virtual bool isProfitableToHoist(Instruction *I) = 0;
}
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) override {
+ unsigned AddrSpace,
+ Instruction *I) override {
return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace);
+ Scale, AddrSpace, I);
}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {
return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
Scale, AddrSpace);
}
+ bool LSRWithInstrQueries() override {
+ return Impl.LSRWithInstrQueries();
+ }
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
return Impl.isFoldableMemAccessOffset(I, Offset);
}
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) {
+ unsigned AddrSpace, Instruction *I = nullptr) {
// Guess that only reg and reg+reg addressing is allowed. This heuristic is
// taken from the implementation of LSR.
return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
return -1;
}
+ bool LSRWithInstrQueries() { return false; }
+
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; }
bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace) {
+ unsigned AddrSpace, Instruction *I = nullptr) {
TargetLoweringBase::AddrMode AM;
AM.BaseGV = BaseGV;
AM.BaseOffs = BaseOffset;
AM.HasBaseReg = HasBaseReg;
AM.Scale = Scale;
- return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
+ return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
}
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
///
/// TODO: Remove default argument
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AddrSpace) const;
+ Type *Ty, unsigned AddrSpace,
+ Instruction *I = nullptr) const;
/// \brief Return the cost of the scaling factor used in the addressing mode
/// represented by AM for this target, for a load/store of the specified type.
int64_t BaseOffset,
bool HasBaseReg,
int64_t Scale,
- unsigned AddrSpace) const {
+ unsigned AddrSpace,
+ Instruction *I) const {
return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace);
+ Scale, AddrSpace, I);
}
bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
return Cost;
}
+bool TargetTransformInfo::LSRWithInstrQueries() const {
+ return TTIImpl->LSRWithInstrQueries();
+}
+
bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
int64_t Offset) const {
return TTIImpl->isFoldableMemAccessOffset(I, Offset);
/// by AM is legal for this target, for a load/store of the specified type.
bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
/// by AM is legal for this target, for a load/store of the specified type.
bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// AArch64 has five basic addressing modes:
// reg
// reg + 9-bit signed offset
/// Return true if the addressing mode represented by AM is legal for this
/// target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
/// \brief Return the cost of the scaling factor used in the addressing
/// mode represented by AM for this target, for a load/store
bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
Type *&/*AccessTy*/) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
bool canMergeStoresTo(unsigned AS, EVT MemVT,
const SelectionDAG &DAG) const override;
/// by AM is legal for this target, for a load/store of the specified type.
bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
EVT VT = getValueType(DL, Ty, true);
if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
return false;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
/// getScalingFactorCost - Return the cost of the scaling used in
/// addressing mode represented by AM.
/// by AM is legal for this target, for a load/store of the specified type.
bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
int64_t Offs = AM.BaseOffs;
// Allow absolute addresses.
SelectionDAG &DAG) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset,
ISD::MemIndexedMode &AM,
/// AM is legal for this target, for a load/store of the specified type.
bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
if (Ty->isSized()) {
// When LSR detects uses of the same base address to access different
// types (e.g. unions), it will assume a conservative type for these
/// mode is legal for a load/store of any legal type.
/// TODO: Handle pre/postinc as well.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
/// Return true if folding a constant offset with the given GlobalAddress
/// is legal. It is frequently not legal in PIC relocation models.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
}
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
/// (CodeGenPrepare.cpp)
bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// AddrMode - This represents an addressing mode of:
// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
//
/// reduction (LoopStrengthReduce.cpp) and memory optimization for
/// address mode (CodeGenPrepare.cpp)
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
bool isTruncateFree(Type *SrcTy, Type *DstTy) const override {
// Truncating 64-bit to 32-bit is free in SASS.
// by AM is legal for this target, for a load/store of the specified type.
bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS, Instruction *I) const {
// PPC does not allow r+i addressing modes for vectors!
if (Ty->isVectorTy() && AM.BaseOffs != 0)
return false;
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
/// isLegalICmpImmediate - Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
return true;
}
+// Information about the addressing mode for a memory access.
+struct AddressingMode {
+ // True if a long displacement is supported.
+ bool LongDisplacement;
+
+ // True if use of index register is supported.
+ bool IndexReg;
+
+ AddressingMode(bool LongDispl, bool IdxReg) :
+ LongDisplacement(LongDispl), IndexReg(IdxReg) {}
+};
+
+// Return the desired addressing mode for a Load which has only one use (in
+// the same block) which is a Store.
+static AddressingMode getLoadStoreAddrMode(bool HasVector,
+ Type *Ty) {
+ // With vector support a Load->Store combination may be combined to either
+ // an MVC or vector operations and it seems to work best to allow the
+ // vector addressing mode.
+ if (HasVector)
+ return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
+
+ // Otherwise only the MVC case is special.
+ bool MVC = Ty->isIntegerTy(8);
+ return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/);
+}
+
+// Return the addressing mode which seems most desirable given an LLVM
+// Instruction pointer.
+static AddressingMode
+supportedAddressingMode(Instruction *I, bool HasVector) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: break;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
+ }
+ }
+
+ if (isa<LoadInst>(I) && I->hasOneUse()) {
+ auto *SingleUser = dyn_cast<Instruction>(*I->user_begin());
+ if (SingleUser->getParent() == I->getParent()) {
+ if (isa<ICmpInst>(SingleUser)) {
+ if (auto *C = dyn_cast<ConstantInt>(SingleUser->getOperand(1)))
+ if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue()))
+ // Comparison of memory with 16 bit signed / unsigned immediate
+ return AddressingMode(false/*LongDispl*/, false/*IdxReg*/);
+ } else if (isa<StoreInst>(SingleUser))
+ // Load->Store
+ return getLoadStoreAddrMode(HasVector, I->getType());
+ }
+ } else if (auto *StoreI = dyn_cast<StoreInst>(I)) {
+ if (auto *LoadI = dyn_cast<LoadInst>(StoreI->getValueOperand()))
+ if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent())
+ // Load->Store
+ return getLoadStoreAddrMode(HasVector, LoadI->getType());
+ }
+
+ if (HasVector && (isa<LoadInst>(I) || isa<StoreInst>(I))) {
+
+ // * Use LDE instead of LE/LEY for z13 to avoid partial register
+ // dependencies (LDE only supports small offsets).
+ // * Utilize the vector registers to hold floating point
+ // values (vector load / store instructions only support small
+ // offsets).
+
+ Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
+ I->getOperand(0)->getType());
+ bool IsFPAccess = MemAccessTy->isFloatingPointTy();
+ bool IsVectorAccess = MemAccessTy->isVectorTy();
+
+ // A store of an extracted vector element will be combined into a VSTE type
+ // instruction.
+ if (!IsVectorAccess && isa<StoreInst>(I)) {
+ Value *DataOp = I->getOperand(0);
+ if (isa<ExtractElementInst>(DataOp))
+ IsVectorAccess = true;
+ }
+
+ // A load which gets inserted into a vector element will be combined into a
+ // VLE type instruction.
+ if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
+ User *LoadUser = *I->user_begin();
+ if (isa<InsertElementInst>(LoadUser))
+ IsVectorAccess = true;
+ }
+
+ if (IsFPAccess || IsVectorAccess)
+ return AddressingMode(false/*LongDispl*/, true/*IdxReg*/);
+ }
+
+ return AddressingMode(true/*LongDispl*/, true/*IdxReg*/);
+}
+
+// TODO: This method should also check for the displacement when *I is
+// passed. It may also be possible to merge with isFoldableMemAccessOffset()
+// now that both methods get the *I.
bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
- const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const {
// Punt on globals for now, although they can be used in limited
// RELATIVE LONG cases.
if (AM.BaseGV)
if (!isInt<20>(AM.BaseOffs))
return false;
- // Indexing is OK but no scale factor can be applied.
- return AM.Scale == 0 || AM.Scale == 1;
+ if (I != nullptr &&
+ !supportedAddressingMode(I, Subtarget.hasVector()).IndexReg)
+ // No indexing allowed.
+ return AM.Scale == 0;
+ else
+ // Indexing is OK but no scale factor can be applied.
+ return AM.Scale == 0 || AM.Scale == 1;
}
+// TODO: Should we check for isInt<20> also?
bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I,
int64_t Offset) const {
- // This only applies to z13.
- if (!Subtarget.hasVector())
- return true;
-
- // * Use LDE instead of LE/LEY to avoid partial register
- // dependencies (LDE only supports small offsets).
- // * Utilize the vector registers to hold floating point
- // values (vector load / store instructions only support small
- // offsets).
-
- assert (isa<LoadInst>(I) || isa<StoreInst>(I));
- Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
- I->getOperand(0)->getType());
- bool IsFPAccess = MemAccessTy->isFloatingPointTy();
- bool IsVectorAccess = MemAccessTy->isVectorTy();
-
- // A store of an extracted vector element will be combined into a VSTE type
- // instruction.
- if (!IsVectorAccess && isa<StoreInst>(I)) {
- Value *DataOp = I->getOperand(0);
- if (isa<ExtractElementInst>(DataOp))
- IsVectorAccess = true;
- }
-
- // A load which gets inserted into a vector element will be combined into a
- // VLE type instruction.
- if (!IsVectorAccess && isa<LoadInst>(I) && I->hasOneUse()) {
- User *LoadUser = *I->user_begin();
- if (isa<InsertElementInst>(LoadUser))
- IsVectorAccess = true;
- }
-
- if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess))
- return false;
+ if (!supportedAddressingMode(I, Subtarget.hasVector()).LongDisplacement)
+ return (isUInt<12>(Offset));
return true;
}
bool isLegalICmpImmediate(int64_t Imm) const override;
bool isLegalAddImmediate(int64_t Imm) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override;
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
unsigned Align,
UP.Force = true;
}
+
+bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2) {
+ // SystemZ specific: check instruction count (first), and don't care about
+ // ImmCost, since offsets are checked explicitly.
+ return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost,
+ C1.NumIVMuls, C1.NumBaseAdds,
+ C1.ScaleCost, C1.SetupCost) <
+ std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost,
+ C2.NumIVMuls, C2.NumBaseAdds,
+ C2.ScaleCost, C2.SetupCost);
+}
+
unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP);
+ bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
+ TargetTransformInfo::LSRCost &C2);
/// @}
/// \name Vector TTI Implementations
unsigned getMinPrefetchStride() { return 2048; }
bool prefersVectorizedAddressing() { return false; }
+ bool LSRWithInstrQueries() { return true; }
bool supportsEfficientVectorElementLoadStore() { return true; }
bool enableInterleavedAccessVectorization() { return true; }
bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM,
Type *Ty,
- unsigned AS) const {
+ unsigned AS,
+ Instruction *I) const {
// WebAssembly offsets are added as unsigned without wrapping. The
// isLegalAddressingMode gives us no way to determine if wrapping could be
// happening, so we approximate this by accepting only non-negative offsets.
bool isCheapToSpeculateCttz() const override;
bool isCheapToSpeculateCtlz() const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
- unsigned AS) const override;
+ unsigned AS,
+ Instruction *I = nullptr) const override;
bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align,
bool *Fast) const override;
bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
/// target, for a load/store of the specified type.
bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS,
+ Instruction *I) const {
// X86 supports extremely general addressing modes.
CodeModel::Model M = getTargetMachine().getCodeModel();
/// Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
/// Return true if the specified immediate is legal
/// icmp immediate, that is the target has icmp instructions which can
/// by AM is legal for this target, for a load/store of the specified type.
bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL,
const AddrMode &AM, Type *Ty,
- unsigned AS) const {
+ unsigned AS,
+ Instruction *I) const {
if (Ty->getTypeID() == Type::VoidTyID)
return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
MachineBasicBlock *MBB) const override;
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
- Type *Ty, unsigned AS) const override;
+ Type *Ty, unsigned AS,
+ Instruction *I = nullptr) const override;
/// If a physical register, this returns the register that receives the
/// exception address on entry to an EH pad.
// of intrinsics.
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::memset:
case Intrinsic::prefetch:
if (II->getArgOperand(0) == OperandVal)
isAddress = true;
break;
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ if (II->getArgOperand(0) == OperandVal ||
+ II->getArgOperand(1) == OperandVal)
+ isAddress = true;
+ break;
}
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
if (RMW->getPointerOperand() == OperandVal)
// Check with target if this offset with this instruction is
// specifically not supported.
- if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
+ if (LU.Kind == LSRUse::Address && Offset != 0 &&
!TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
C.NumBaseAdds++;
}
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
LSRUse::KindType Kind, MemAccessTy AccessTy,
GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale) {
+ bool HasBaseReg, int64_t Scale,
+ Instruction *Fixup = nullptr) {
switch (Kind) {
case LSRUse::Address:
return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset,
- HasBaseReg, Scale, AccessTy.AddrSpace);
+ HasBaseReg, Scale, AccessTy.AddrSpace, Fixup);
case LSRUse::ICmpZero:
// There's not even a target hook for querying whether it would be legal to
static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
const LSRUse &LU, const Formula &F) {
+ // Target may want to look at the user instructions.
+ if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
+ for (const LSRFixup &Fixup : LU.Fixups)
+ if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
+ F.BaseOffset, F.HasBaseReg, F.Scale,
+ Fixup.UserInst))
+ return false;
+ return true;
+ }
+
return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
F.Scale);
for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i
; CHECK-LABEL: .LBB0_5:
; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}})
-; CHECK: lg %r{{.*}}, -4(%r{{.*}})
+; CHECK: lg %r{{.*}}, 8(%r{{.*}})
; Overlapping load should go before the store
%indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ]
%3 = shl nsw i64 %indvars.iv.i, 6
define void @f1(i32 *%dest, i32 %a) {
; CHECK-LABEL: f1:
; CHECK-NOT: sllg
-; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}})
+; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}})
; CHECK: br %r14
entry:
br label %loop
%exitcond = icmp eq i32 %lftr.wideiv, %S
br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
}
+
+; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg).
+%0 = type { %1, %2* }
+%1 = type { %2*, %2* }
+%2 = type <{ %3, i32, [4 x i8] }>
+%3 = type { i16*, i16*, i16* }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0
+
+define void @f8() {
+; CHECK-Z13-LABEL: f8:
+; CHECK-Z13: mvc
+; CHECK-Z13-NEXT: mvc
+; CHECK-Z13-NEXT: mvc
+; CHECK-Z13-NEXT: mvc
+
+bb:
+ %tmp = load %0*, %0** undef, align 8
+ br i1 undef, label %bb2, label %bb1
+
+bb1: ; preds = %bb
+ br label %bb2
+
+bb2: ; preds = %bb1, %bb
+ %tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ]
+ %tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ]
+ br label %bb5
+
+bb5: ; preds = %bb5, %bb2
+ %tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ]
+ %tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ]
+ %tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1
+ %tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1
+ %tmp10 = bitcast %0* %tmp9 to i8*
+ %tmp11 = bitcast %0* %tmp8 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false)
+ %tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2
+ %tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2
+ %tmp14 = bitcast %0* %tmp13 to i8*
+ %tmp15 = bitcast %0* %tmp12 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false)
+ %tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3
+ %tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3
+ %tmp18 = bitcast %0* %tmp17 to i8*
+ %tmp19 = bitcast %0* %tmp16 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false)
+ %tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4
+ %tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4
+ %tmp22 = bitcast %0* %tmp21 to i8*
+ %tmp23 = bitcast %0* %tmp20 to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false)
+ br label %bb5
+}
+
+; Test that a chsi does not need an aghik inside the loop (no index reg)
+define void @f9() {
+; CHECK-Z13-LABEL: f9:
+; CHECK-Z13: # =>This Inner Loop Header: Depth=1
+; CHECK-Z13-NOT: aghik
+; CHECK-Z13: chsi
+
+entry:
+ br label %for.body.i63
+
+for.body.i63: ; preds = %for.inc.i, %entry
+ %indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ]
+ %arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i
+ %tmp = load i32, i32* %arrayidx.i62, align 4
+ %cmp9.i = icmp eq i32 %tmp, 0
+ br i1 %cmp9.i, label %for.inc.i, label %if.then10.i
+
+if.then10.i: ; preds = %for.body.i63
+ unreachable
+
+for.inc.i: ; preds = %for.body.i63
+ %indvars.iv.next156.i = or i64 %indvars.iv155.i, 1
+ %arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i
+ %tmp1 = load i32, i32* %arrayidx.i62.1, align 4
+ %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4
+ br label %for.body.i63
+}