From ed69aeeaad2cfea56c10be20d1a5677df443c83e Mon Sep 17 00:00:00 2001 From: Jonas Paulsson Date: Fri, 21 Jul 2017 11:59:37 +0000 Subject: [PATCH] [SystemZ, LoopStrengthReduce] This patch makes LSR generate better code for SystemZ in the cases of memory intrinsics, Load->Store pairs or comparison of immediate with memory. In order to achieve this, the following common code changes were made: * New TTI hook: LSRWithInstrQueries(), which defaults to false. Controls if LSR should do instruction-based addressing evaluations by calling isLegalAddressingMode() with the Instruction pointers. * In LoopStrengthReduce: handle address operands of memset, memmove and memcpy as address uses, and call isFoldableMemAccessOffset() for any LSRUse::Address, not just loads or stores. SystemZ changes: * isLSRCostLess() implemented with Insns first, and without ImmCost. * New function supportedAddressingMode() that is a helper for TTI methods looking at Instructions passed via pointers. Review: Ulrich Weigand, Quentin Colombet https://reviews.llvm.org/D35262 https://reviews.llvm.org/D35049 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@308729 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Analysis/TargetTransformInfo.h | 22 ++- .../llvm/Analysis/TargetTransformInfoImpl.h | 4 +- include/llvm/CodeGen/BasicTTIImpl.h | 4 +- include/llvm/Target/TargetLowering.h | 3 +- lib/Analysis/TargetTransformInfo.cpp | 9 +- lib/CodeGen/TargetLoweringBase.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.cpp | 2 +- lib/Target/AArch64/AArch64ISelLowering.h | 3 +- lib/Target/AMDGPU/SIISelLowering.cpp | 2 +- lib/Target/AMDGPU/SIISelLowering.h | 3 +- lib/Target/ARM/ARMISelLowering.cpp | 2 +- lib/Target/ARM/ARMISelLowering.h | 3 +- lib/Target/AVR/AVRISelLowering.cpp | 2 +- lib/Target/AVR/AVRISelLowering.h | 3 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 2 +- lib/Target/Hexagon/HexagonISelLowering.h | 3 +- lib/Target/Mips/MipsISelLowering.cpp | 2 +- lib/Target/Mips/MipsISelLowering.h | 3 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 2 +- lib/Target/NVPTX/NVPTXISelLowering.h | 3 +- lib/Target/PowerPC/PPCISelLowering.cpp | 2 +- lib/Target/PowerPC/PPCISelLowering.h | 3 +- lib/Target/SystemZ/SystemZISelLowering.cpp | 148 +++++++++++++----- lib/Target/SystemZ/SystemZISelLowering.h | 3 +- .../SystemZ/SystemZTargetTransformInfo.cpp | 13 ++ .../SystemZ/SystemZTargetTransformInfo.h | 3 + .../WebAssembly/WebAssemblyISelLowering.cpp | 3 +- .../WebAssembly/WebAssemblyISelLowering.h | 3 +- lib/Target/X86/X86ISelLowering.cpp | 3 +- lib/Target/X86/X86ISelLowering.h | 3 +- lib/Target/XCore/XCoreISelLowering.cpp | 3 +- lib/Target/XCore/XCoreISelLowering.h | 3 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 24 ++- test/CodeGen/SystemZ/dag-combine-01.ll | 2 +- test/CodeGen/SystemZ/loop-01.ll | 83 +++++++++- 35 files changed, 301 insertions(+), 77 deletions(-) diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 24edd3826a2..6599aa89ef2 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -420,10 +420,12 @@ public: /// this target, for a load/store of the specified type. /// The type may be VoidTy, in which case only return true if the addressing /// mode is legal for a load/store of any legal type. + /// If target returns true in LSRWithInstrQueries(), I may be valid. /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace = 0) const; + unsigned AddrSpace = 0, + Instruction *I = nullptr) const; /// \brief Return true if LSR cost of C1 is lower than C1. bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, @@ -453,6 +455,12 @@ public: bool HasBaseReg, int64_t Scale, unsigned AddrSpace = 0) const; + /// \brief Return true if the loop strength reduce pass should make + /// Instruction* based TTI queries to isLegalAddressingMode(). This is + /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned + /// immediate offset and no index register. + bool LSRWithInstrQueries() const; + /// \brief Return true if target supports the load / store /// instruction with the given Offset on the form reg + Offset. It /// may be that Offset is too big for a certain type (register @@ -882,7 +890,8 @@ public: virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) = 0; + unsigned AddrSpace, + Instruction *I) = 0; virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) = 0; virtual bool isLegalMaskedStore(Type *DataType) = 0; @@ -893,6 +902,7 @@ public: virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace) = 0; + virtual bool LSRWithInstrQueries() = 0; virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; virtual bool isProfitableToHoist(Instruction *I) = 0; @@ -1085,9 +1095,10 @@ public: } bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) override { + unsigned AddrSpace, + Instruction *I) override { return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, TargetTransformInfo::LSRCost &C2) override { @@ -1114,6 +1125,9 @@ public: return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, AddrSpace); } + bool LSRWithInstrQueries() override { + return Impl.LSRWithInstrQueries(); + } bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override { return Impl.isFoldableMemAccessOffset(I, Offset); } diff --git a/include/llvm/Analysis/TargetTransformInfoImpl.h b/include/llvm/Analysis/TargetTransformInfoImpl.h index 0b07fe9aa23..cdb3949be1c 100644 --- a/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -230,7 +230,7 @@ public: bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) { + unsigned AddrSpace, Instruction *I = nullptr) { // Guess that only reg and reg+reg addressing is allowed. This heuristic is // taken from the implementation of LSR. return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); @@ -262,6 +262,8 @@ public: return -1; } + bool LSRWithInstrQueries() { return false; } + bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; } bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index 63310702479..1e569632384 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -110,13 +110,13 @@ public: bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) { + unsigned AddrSpace, Instruction *I = nullptr) { TargetLoweringBase::AddrMode AM; AM.BaseGV = BaseGV; AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace); + return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); } bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 23711d636c9..525037903b2 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -1887,7 +1887,8 @@ public: /// /// TODO: Remove default argument virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AddrSpace) const; + Type *Ty, unsigned AddrSpace, + Instruction *I = nullptr) const; /// \brief Return the cost of the scaling factor used in the addressing mode /// represented by AM for this target, for a load/store of the specified type. diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp index 25813c65037..762760dd332 100644 --- a/lib/Analysis/TargetTransformInfo.cpp +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -144,9 +144,10 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, - unsigned AddrSpace) const { + unsigned AddrSpace, + Instruction *I) const { return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, - Scale, AddrSpace); + Scale, AddrSpace, I); } bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const { @@ -184,6 +185,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, return Cost; } +bool TargetTransformInfo::LSRWithInstrQueries() const { + return TTIImpl->LSRWithInstrQueries(); +} + bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const { return TTIImpl->isFoldableMemAccessOffset(I, Offset); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 3b82d241c44..8be9e192e5b 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1481,7 +1481,7 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const { /// by AM is legal for this target, for a load/store of the specified type. bool TargetLoweringBase::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // The default implementation of this implements a conservative RISCy, r+r and // r+i addr mode. diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index c6150f9e5d1..05bab70c12a 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7818,7 +7818,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Immed) const { /// by AM is legal for this target, for a load/store of the specified type. bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // AArch64 has five basic addressing modes: // reg // reg + 9-bit signed offset diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 3b0e0f1de89..0eb80d03938 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -338,7 +338,8 @@ public: /// Return true if the addressing mode represented by AM is legal for this /// target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; /// \brief Return the cost of the scaling factor used in the addressing /// mode represented by AM for this target, for a load/store diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 2356405f091..6ca8cd104bd 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -624,7 +624,7 @@ bool SITargetLowering::isLegalMUBUFAddressingMode(const AddrMode &AM) const { bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index e6bb3d6cd41..e0136e97a56 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -151,7 +151,8 @@ public: Type *&/*AccessTy*/) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6ba7593543a..6d9a32c9956 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -12380,7 +12380,7 @@ bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, /// by AM is legal for this target, for a load/store of the specified type. bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { EVT VT = getValueType(DL, Ty, true); if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index f05b1425523..8d73b2ed75e 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -317,7 +317,8 @@ class InstrItineraryData; /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// getScalingFactorCost - Return the cost of the scaling used in /// addressing mode represented by AM. diff --git a/lib/Target/AVR/AVRISelLowering.cpp b/lib/Target/AVR/AVRISelLowering.cpp index 7d3faac1dcc..ab49cf9de89 100644 --- a/lib/Target/AVR/AVRISelLowering.cpp +++ b/lib/Target/AVR/AVRISelLowering.cpp @@ -724,7 +724,7 @@ void AVRTargetLowering::ReplaceNodeResults(SDNode *N, /// by AM is legal for this target, for a load/store of the specified type. bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { int64_t Offs = AM.BaseOffs; // Allow absolute addresses. diff --git a/lib/Target/AVR/AVRISelLowering.h b/lib/Target/AVR/AVRISelLowering.h index b44c62a21ac..37e01dc950f 100644 --- a/lib/Target/AVR/AVRISelLowering.h +++ b/lib/Target/AVR/AVRISelLowering.h @@ -83,7 +83,8 @@ public: SelectionDAG &DAG) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 3997702bc96..1291af86c7e 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -2993,7 +2993,7 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { /// AM is legal for this target, for a load/store of the specified type. bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { if (Ty->isSized()) { // When LSR detects uses of the same base address to access different // types (e.g. unions), it will assume a conservative type for these diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index d66cbc95e91..1b1462cce5a 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -231,7 +231,8 @@ namespace HexagonISD { /// mode is legal for a load/store of any legal type. /// TODO: Handle pre/postinc as well. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// Return true if folding a constant offset with the given GlobalAddress /// is legal. It is frequently not legal in PIC relocation models. bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 8e66a59c718..c4077a96a1d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -3991,7 +3991,7 @@ void MipsTargetLowering::LowerAsmOperandForConstraint(SDValue Op, bool MipsTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // No global is ever allowed as a base. if (AM.BaseGV) return false; diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 7566e1d76bf..56a9807fc23 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -625,7 +625,8 @@ namespace llvm { } bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override; diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index d939fe5c09b..70070c259c6 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -3805,7 +3805,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( /// (CodeGenPrepare.cpp) bool NVPTXTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // AddrMode - This represents an addressing mode of: // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg // diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 9d7b70d80c1..cbe2dba3e66 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -456,7 +456,8 @@ public: /// reduction (LoopStrengthReduce.cpp) and memory optimization for /// address mode (CodeGenPrepare.cpp) bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool isTruncateFree(Type *SrcTy, Type *DstTy) const override { // Truncating 64-bit to 32-bit is free in SASS. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index b3a3c73f6df..bf3430524bf 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -12810,7 +12810,7 @@ void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, // by AM is legal for this target, for a load/store of the specified type. bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, Instruction *I) const { // PPC does not allow r+i addressing modes for vectors! if (Ty->isVectorTy() && AM.BaseOffs != 0) return false; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 49d7d8220af..e52e85ea765 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -727,7 +727,8 @@ namespace llvm { /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// isLegalICmpImmediate - Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 2d916d2e152..c0e686e1370 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -586,9 +586,107 @@ bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return true; } +// Information about the addressing mode for a memory access. +struct AddressingMode { + // True if a long displacement is supported. + bool LongDisplacement; + + // True if use of index register is supported. + bool IndexReg; + + AddressingMode(bool LongDispl, bool IdxReg) : + LongDisplacement(LongDispl), IndexReg(IdxReg) {} +}; + +// Return the desired addressing mode for a Load which has only one use (in +// the same block) which is a Store. +static AddressingMode getLoadStoreAddrMode(bool HasVector, + Type *Ty) { + // With vector support a Load->Store combination may be combined to either + // an MVC or vector operations and it seems to work best to allow the + // vector addressing mode. + if (HasVector) + return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); + + // Otherwise only the MVC case is special. + bool MVC = Ty->isIntegerTy(8); + return AddressingMode(!MVC/*LongDispl*/, !MVC/*IdxReg*/); +} + +// Return the addressing mode which seems most desirable given an LLVM +// Instruction pointer. +static AddressingMode +supportedAddressingMode(Instruction *I, bool HasVector) { + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memset: + case Intrinsic::memmove: + case Intrinsic::memcpy: + return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); + } + } + + if (isa(I) && I->hasOneUse()) { + auto *SingleUser = dyn_cast(*I->user_begin()); + if (SingleUser->getParent() == I->getParent()) { + if (isa(SingleUser)) { + if (auto *C = dyn_cast(SingleUser->getOperand(1))) + if (isInt<16>(C->getSExtValue()) || isUInt<16>(C->getZExtValue())) + // Comparison of memory with 16 bit signed / unsigned immediate + return AddressingMode(false/*LongDispl*/, false/*IdxReg*/); + } else if (isa(SingleUser)) + // Load->Store + return getLoadStoreAddrMode(HasVector, I->getType()); + } + } else if (auto *StoreI = dyn_cast(I)) { + if (auto *LoadI = dyn_cast(StoreI->getValueOperand())) + if (LoadI->hasOneUse() && LoadI->getParent() == I->getParent()) + // Load->Store + return getLoadStoreAddrMode(HasVector, LoadI->getType()); + } + + if (HasVector && (isa(I) || isa(I))) { + + // * Use LDE instead of LE/LEY for z13 to avoid partial register + // dependencies (LDE only supports small offsets). + // * Utilize the vector registers to hold floating point + // values (vector load / store instructions only support small + // offsets). + + Type *MemAccessTy = (isa(I) ? I->getType() : + I->getOperand(0)->getType()); + bool IsFPAccess = MemAccessTy->isFloatingPointTy(); + bool IsVectorAccess = MemAccessTy->isVectorTy(); + + // A store of an extracted vector element will be combined into a VSTE type + // instruction. + if (!IsVectorAccess && isa(I)) { + Value *DataOp = I->getOperand(0); + if (isa(DataOp)) + IsVectorAccess = true; + } + + // A load which gets inserted into a vector element will be combined into a + // VLE type instruction. + if (!IsVectorAccess && isa(I) && I->hasOneUse()) { + User *LoadUser = *I->user_begin(); + if (isa(LoadUser)) + IsVectorAccess = true; + } + + if (IsFPAccess || IsVectorAccess) + return AddressingMode(false/*LongDispl*/, true/*IdxReg*/); + } + + return AddressingMode(true/*LongDispl*/, true/*IdxReg*/); +} + +// TODO: This method should also check for the displacement when *I is +// passed. It may also be possible to merge with isFoldableMemAccessOffset() +// now that both methods get the *I. bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, - const AddrMode &AM, Type *Ty, - unsigned AS) const { + const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I) const { // Punt on globals for now, although they can be used in limited // RELATIVE LONG cases. if (AM.BaseGV) @@ -598,46 +696,20 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL, if (!isInt<20>(AM.BaseOffs)) return false; - // Indexing is OK but no scale factor can be applied. - return AM.Scale == 0 || AM.Scale == 1; + if (I != nullptr && + !supportedAddressingMode(I, Subtarget.hasVector()).IndexReg) + // No indexing allowed. + return AM.Scale == 0; + else + // Indexing is OK but no scale factor can be applied. + return AM.Scale == 0 || AM.Scale == 1; } +// TODO: Should we check for isInt<20> also? bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const { - // This only applies to z13. - if (!Subtarget.hasVector()) - return true; - - // * Use LDE instead of LE/LEY to avoid partial register - // dependencies (LDE only supports small offsets). - // * Utilize the vector registers to hold floating point - // values (vector load / store instructions only support small - // offsets). - - assert (isa(I) || isa(I)); - Type *MemAccessTy = (isa(I) ? I->getType() : - I->getOperand(0)->getType()); - bool IsFPAccess = MemAccessTy->isFloatingPointTy(); - bool IsVectorAccess = MemAccessTy->isVectorTy(); - - // A store of an extracted vector element will be combined into a VSTE type - // instruction. - if (!IsVectorAccess && isa(I)) { - Value *DataOp = I->getOperand(0); - if (isa(DataOp)) - IsVectorAccess = true; - } - - // A load which gets inserted into a vector element will be combined into a - // VLE type instruction. - if (!IsVectorAccess && isa(I) && I->hasOneUse()) { - User *LoadUser = *I->user_begin(); - if (isa(LoadUser)) - IsVectorAccess = true; - } - - if (!isUInt<12>(Offset) && (IsFPAccess || IsVectorAccess)) - return false; + if (!supportedAddressingMode(I, Subtarget.hasVector()).LongDisplacement) + return (isUInt<12>(Offset)); return true; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index abe8b7233e6..a59f507477d 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -384,7 +384,8 @@ public: bool isLegalICmpImmediate(int64_t Imm) const override; bool isLegalAddImmediate(int64_t Imm) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override; bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp index 506dc742799..a4d9421e08a 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp @@ -292,6 +292,19 @@ void SystemZTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, UP.Force = true; } + +bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2) { + // SystemZ specific: check instruction count (first), and don't care about + // ImmCost, since offsets are checked explicitly. + return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, + C1.NumIVMuls, C1.NumBaseAdds, + C1.ScaleCost, C1.SetupCost) < + std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, + C2.NumIVMuls, C2.NumBaseAdds, + C2.ScaleCost, C2.SetupCost); +} + unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) { if (!Vector) // Discount the stack pointer. Also leave out %r0, since it can't diff --git a/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/lib/Target/SystemZ/SystemZTargetTransformInfo.h index a0c6fa94f8c..28821a2ca11 100644 --- a/lib/Target/SystemZ/SystemZTargetTransformInfo.h +++ b/lib/Target/SystemZ/SystemZTargetTransformInfo.h @@ -48,6 +48,8 @@ public: void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP); + bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, + TargetTransformInfo::LSRCost &C2); /// @} /// \name Vector TTI Implementations @@ -61,6 +63,7 @@ public: unsigned getMinPrefetchStride() { return 2048; } bool prefersVectorizedAddressing() { return false; } + bool LSRWithInstrQueries() { return true; } bool supportsEfficientVectorElementLoadStore() { return true; } bool enableInterleavedAccessVectorization() { return true; } diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 814377003cb..21df22c7a6d 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -233,7 +233,8 @@ bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz() const { bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { // WebAssembly offsets are added as unsigned without wrapping. The // isLegalAddressingMode gives us no way to determine if wrapping could be // happening, so we approximate this by accepting only non-negative offsets. diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index 99d3d0d558f..a998ff2b5b5 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -55,7 +55,8 @@ class WebAssemblyTargetLowering final : public TargetLowering { bool isCheapToSpeculateCttz() const override; bool isCheapToSpeculateCtlz() const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const override; + unsigned AS, + Instruction *I = nullptr) const override; bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace, unsigned Align, bool *Fast) const override; bool isIntDivCheap(EVT VT, AttributeList Attr) const override; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 57a40f86782..697210b95be 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -24757,7 +24757,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { /// target, for a load/store of the specified type. bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { // X86 supports extremely general addressing modes. CodeModel::Model M = getTargetMachine().getCodeModel(); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index dbbc2bbba6a..fe0327852d0 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -903,7 +903,8 @@ namespace llvm { /// Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// Return true if the specified immediate is legal /// icmp immediate, that is the target has icmp instructions which can diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 1da189c5cd3..f9ead4e67d7 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -1889,7 +1889,8 @@ static inline bool isImmUs4(int64_t val) /// by AM is legal for this target, for a load/store of the specified type. bool XCoreTargetLowering::isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, - unsigned AS) const { + unsigned AS, + Instruction *I) const { if (Ty->getTypeID() == Type::VoidTyID) return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs); diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index 452d5b04605..cb2b4161d3a 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -123,7 +123,8 @@ namespace llvm { MachineBasicBlock *MBB) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, - Type *Ty, unsigned AS) const override; + Type *Ty, unsigned AS, + Instruction *I = nullptr) const override; /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3638da118cb..5c8f7e0870f 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -783,10 +783,17 @@ static bool isAddressUse(Instruction *Inst, Value *OperandVal) { // of intrinsics. switch (II->getIntrinsicID()) { default: break; + case Intrinsic::memset: case Intrinsic::prefetch: if (II->getArgOperand(0) == OperandVal) isAddress = true; break; + case Intrinsic::memmove: + case Intrinsic::memcpy: + if (II->getArgOperand(0) == OperandVal || + II->getArgOperand(1) == OperandVal) + isAddress = true; + break; } } else if (AtomicRMWInst *RMW = dyn_cast(Inst)) { if (RMW->getPointerOperand() == OperandVal) @@ -1280,7 +1287,7 @@ void Cost::RateFormula(const TargetTransformInfo &TTI, // Check with target if this offset with this instruction is // specifically not supported. - if ((isa(Fixup.UserInst) || isa(Fixup.UserInst)) && + if (LU.Kind == LSRUse::Address && Offset != 0 && !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset)) C.NumBaseAdds++; } @@ -1535,11 +1542,12 @@ LLVM_DUMP_METHOD void LSRUse::dump() const { static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, LSRUse::KindType Kind, MemAccessTy AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, - bool HasBaseReg, int64_t Scale) { + bool HasBaseReg, int64_t Scale, + Instruction *Fixup = nullptr) { switch (Kind) { case LSRUse::Address: return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, BaseOffset, - HasBaseReg, Scale, AccessTy.AddrSpace); + HasBaseReg, Scale, AccessTy.AddrSpace, Fixup); case LSRUse::ICmpZero: // There's not even a target hook for querying whether it would be legal to @@ -1645,6 +1653,16 @@ static bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, const LSRUse &LU, const Formula &F) { + // Target may want to look at the user instructions. + if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { + for (const LSRFixup &Fixup : LU.Fixups) + if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, + F.BaseOffset, F.HasBaseReg, F.Scale, + Fixup.UserInst)) + return false; + return true; + } + return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); diff --git a/test/CodeGen/SystemZ/dag-combine-01.ll b/test/CodeGen/SystemZ/dag-combine-01.ll index a56a118dada..019421cfdfe 100644 --- a/test/CodeGen/SystemZ/dag-combine-01.ll +++ b/test/CodeGen/SystemZ/dag-combine-01.ll @@ -40,7 +40,7 @@ for.body.3.lr.ph.i: ; preds = %for.body.3.lr.ph.i. for.body.3.i: ; preds = %for.body.3.i, %for.body.3.lr.ph.i ; CHECK-LABEL: .LBB0_5: ; CHECK-NOT: stfh %r{{.*}}, 0(%r{{.*}}) -; CHECK: lg %r{{.*}}, -4(%r{{.*}}) +; CHECK: lg %r{{.*}}, 8(%r{{.*}}) ; Overlapping load should go before the store %indvars.iv.i = phi i64 [ 0, %for.body.3.lr.ph.i ], [ %indvars.iv.next.i, %for.body.3.i ] %3 = shl nsw i64 %indvars.iv.i, 6 diff --git a/test/CodeGen/SystemZ/loop-01.ll b/test/CodeGen/SystemZ/loop-01.ll index 321be4b8e62..79afc7f4198 100644 --- a/test/CodeGen/SystemZ/loop-01.ll +++ b/test/CodeGen/SystemZ/loop-01.ll @@ -9,7 +9,7 @@ define void @f1(i32 *%dest, i32 %a) { ; CHECK-LABEL: f1: ; CHECK-NOT: sllg -; CHECK: st %r3, 0({{%r[1-5],%r[1-5]}}) +; CHECK: st %r3, 400({{%r[1-5],%r[1-5]}}) ; CHECK: br %r14 entry: br label %loop @@ -239,3 +239,84 @@ for.body: ; preds = %for.body.preheader, %for.body %exitcond = icmp eq i32 %lftr.wideiv, %S br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body } + +; Test that a memcpy loop does not get a lot of lays before each mvc (D12 and no index-reg). +%0 = type { %1, %2* } +%1 = type { %2*, %2* } +%2 = type <{ %3, i32, [4 x i8] }> +%3 = type { i16*, i16*, i16* } + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #0 + +define void @f8() { +; CHECK-Z13-LABEL: f8: +; CHECK-Z13: mvc +; CHECK-Z13-NEXT: mvc +; CHECK-Z13-NEXT: mvc +; CHECK-Z13-NEXT: mvc + +bb: + %tmp = load %0*, %0** undef, align 8 + br i1 undef, label %bb2, label %bb1 + +bb1: ; preds = %bb + br label %bb2 + +bb2: ; preds = %bb1, %bb + %tmp3 = phi %0* [ %tmp, %bb ], [ undef, %bb1 ] + %tmp4 = phi %0* [ undef, %bb ], [ undef, %bb1 ] + br label %bb5 + +bb5: ; preds = %bb5, %bb2 + %tmp6 = phi %0* [ %tmp21, %bb5 ], [ %tmp3, %bb2 ] + %tmp7 = phi %0* [ %tmp20, %bb5 ], [ %tmp4, %bb2 ] + %tmp8 = getelementptr inbounds %0, %0* %tmp7, i64 -1 + %tmp9 = getelementptr inbounds %0, %0* %tmp6, i64 -1 + %tmp10 = bitcast %0* %tmp9 to i8* + %tmp11 = bitcast %0* %tmp8 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp10, i8* %tmp11, i64 24, i32 8, i1 false) + %tmp12 = getelementptr inbounds %0, %0* %tmp7, i64 -2 + %tmp13 = getelementptr inbounds %0, %0* %tmp6, i64 -2 + %tmp14 = bitcast %0* %tmp13 to i8* + %tmp15 = bitcast %0* %tmp12 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp14, i8* %tmp15, i64 24, i32 8, i1 false) + %tmp16 = getelementptr inbounds %0, %0* %tmp7, i64 -3 + %tmp17 = getelementptr inbounds %0, %0* %tmp6, i64 -3 + %tmp18 = bitcast %0* %tmp17 to i8* + %tmp19 = bitcast %0* %tmp16 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp18, i8* %tmp19, i64 24, i32 8, i1 false) + %tmp20 = getelementptr inbounds %0, %0* %tmp7, i64 -4 + %tmp21 = getelementptr inbounds %0, %0* %tmp6, i64 -4 + %tmp22 = bitcast %0* %tmp21 to i8* + %tmp23 = bitcast %0* %tmp20 to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp22, i8* %tmp23, i64 24, i32 8, i1 false) + br label %bb5 +} + +; Test that a chsi does not need an aghik inside the loop (no index reg) +define void @f9() { +; CHECK-Z13-LABEL: f9: +; CHECK-Z13: # =>This Inner Loop Header: Depth=1 +; CHECK-Z13-NOT: aghik +; CHECK-Z13: chsi + +entry: + br label %for.body.i63 + +for.body.i63: ; preds = %for.inc.i, %entry + %indvars.iv155.i = phi i64 [ 0, %entry ], [ %indvars.iv.next156.i.3, %for.inc.i ] + %arrayidx.i62 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv155.i + %tmp = load i32, i32* %arrayidx.i62, align 4 + %cmp9.i = icmp eq i32 %tmp, 0 + br i1 %cmp9.i, label %for.inc.i, label %if.then10.i + +if.then10.i: ; preds = %for.body.i63 + unreachable + +for.inc.i: ; preds = %for.body.i63 + %indvars.iv.next156.i = or i64 %indvars.iv155.i, 1 + %arrayidx.i62.1 = getelementptr inbounds i32, i32* undef, i64 %indvars.iv.next156.i + %tmp1 = load i32, i32* %arrayidx.i62.1, align 4 + %indvars.iv.next156.i.3 = add nsw i64 %indvars.iv155.i, 4 + br label %for.body.i63 +} -- 2.40.0