From: Yaxun Liu Date: Mon, 27 Mar 2017 14:04:01 +0000 (+0000) Subject: [AMDGPU] Get address space mapping by target triple environment X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=ab3be33d40cede6c9e610c05d07b057254f57317;p=llvm [AMDGPU] Get address space mapping by target triple environment As we introduced target triple environment amdgiz and amdgizcl, the address space values are no longer enums. We have to decide the value by target triple. The basic idea is to use struct AMDGPUAS to represent address space values. For address space values which are not depend on target triple, use static const members, so that they don't occupy extra memory space and is equivalent to a compile time constant. Since the struct is lightweight and cheap, it can be created on the fly at the point of usage. Or it can be added as member to a pass and created at the beginning of the run* function. Differential Revision: https://reviews.llvm.org/D31284 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298846 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index 99d71f50374..4e2f0af5a20 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -23,6 +23,7 @@ class Pass; class Target; class TargetMachine; class PassRegistry; +class Module; // R600 Passes FunctionPass *createR600VectorRegMerger(TargetMachine &tm); @@ -150,43 +151,53 @@ enum TargetIndex { /// however on the GPU, each address space points to /// a separate piece of memory that is unique from other /// memory locations. -namespace AMDGPUAS { -enum AddressSpaces : unsigned { - PRIVATE_ADDRESS = 0, ///< Address space for private memory. - GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). - CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2) - LOCAL_ADDRESS = 3, ///< Address space for local memory. - FLAT_ADDRESS = 4, ///< Address space for flat memory. - REGION_ADDRESS = 5, ///< Address space for region memory. - PARAM_D_ADDRESS = 6, ///< Address space for direct addressible parameter memory (CONST0) - PARAM_I_ADDRESS = 7, ///< Address space for indirect addressible parameter memory (VTX1) +struct AMDGPUAS { + // The following address space values depend on the triple environment. + unsigned PRIVATE_ADDRESS; ///< Address space for private memory. + unsigned CONSTANT_ADDRESS; ///< Address space for constant memory (VTX2) + unsigned FLAT_ADDRESS; ///< Address space for flat memory. + unsigned REGION_ADDRESS; ///< Address space for region memory. + + // The maximum value for flat, generic, local, private, constant and region. + const static unsigned MAX_COMMON_ADDRESS = 5; + + const static unsigned GLOBAL_ADDRESS = 1; ///< Address space for global memory (RAT0, VTX0). + const static unsigned LOCAL_ADDRESS = 3; ///< Address space for local memory. + const static unsigned PARAM_D_ADDRESS = 6; ///< Address space for direct addressible parameter memory (CONST0) + const static unsigned PARAM_I_ADDRESS = 7; ///< Address space for indirect addressible parameter memory (VTX1) // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on this // order to be able to dynamically index a constant buffer, for example: // // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx - CONSTANT_BUFFER_0 = 8, - CONSTANT_BUFFER_1 = 9, - CONSTANT_BUFFER_2 = 10, - CONSTANT_BUFFER_3 = 11, - CONSTANT_BUFFER_4 = 12, - CONSTANT_BUFFER_5 = 13, - CONSTANT_BUFFER_6 = 14, - CONSTANT_BUFFER_7 = 15, - CONSTANT_BUFFER_8 = 16, - CONSTANT_BUFFER_9 = 17, - CONSTANT_BUFFER_10 = 18, - CONSTANT_BUFFER_11 = 19, - CONSTANT_BUFFER_12 = 20, - CONSTANT_BUFFER_13 = 21, - CONSTANT_BUFFER_14 = 22, - CONSTANT_BUFFER_15 = 23, + const static unsigned CONSTANT_BUFFER_0 = 8; + const static unsigned CONSTANT_BUFFER_1 = 9; + const static unsigned CONSTANT_BUFFER_2 = 10; + const static unsigned CONSTANT_BUFFER_3 = 11; + const static unsigned CONSTANT_BUFFER_4 = 12; + const static unsigned CONSTANT_BUFFER_5 = 13; + const static unsigned CONSTANT_BUFFER_6 = 14; + const static unsigned CONSTANT_BUFFER_7 = 15; + const static unsigned CONSTANT_BUFFER_8 = 16; + const static unsigned CONSTANT_BUFFER_9 = 17; + const static unsigned CONSTANT_BUFFER_10 = 18; + const static unsigned CONSTANT_BUFFER_11 = 19; + const static unsigned CONSTANT_BUFFER_12 = 20; + const static unsigned CONSTANT_BUFFER_13 = 21; + const static unsigned CONSTANT_BUFFER_14 = 22; + const static unsigned CONSTANT_BUFFER_15 = 23; // Some places use this if the address space can't be determined. - UNKNOWN_ADDRESS_SPACE = ~0u + const static unsigned UNKNOWN_ADDRESS_SPACE = ~0u; }; -} // namespace AMDGPUAS +namespace llvm { +namespace AMDGPU { +AMDGPUAS getAMDGPUAS(const Module &M); +AMDGPUAS getAMDGPUAS(const TargetMachine &TM); +AMDGPUAS getAMDGPUAS(Triple T); +} // namespace AMDGPU +} // namespace llvm #endif diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp index 127b2639794..aa5ebae2d9f 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.cpp @@ -37,26 +37,60 @@ void AMDGPUAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } +// Must match the table in getAliasResult. +AMDGPUAAResult::ASAliasRulesTy::ASAliasRulesTy(AMDGPUAS AS_) : AS(AS_) { + // These arrarys are indexed by address space value + // enum elements 0 ... to 5 + static const AliasResult ASAliasRulesPrivIsZero[6][6] = { + /* Private Global Constant Group Flat Region*/ + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias, NoAlias}, + /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias, NoAlias}, + /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias, NoAlias}, + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Region */ {NoAlias , NoAlias , NoAlias , NoAlias , MayAlias, MayAlias} + }; + static const AliasResult ASAliasRulesGenIsZero[6][6] = { + /* Flat Global Region Group Constant Private */ + /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias, MayAlias}, + /* Global */ {MayAlias, MayAlias, NoAlias , NoAlias , NoAlias , NoAlias}, + /* Region */ {NoAlias , NoAlias , MayAlias, NoAlias, NoAlias , MayAlias}, + /* Group */ {MayAlias, NoAlias , NoAlias , MayAlias, NoAlias , NoAlias}, + /* Constant */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias, NoAlias}, + /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , NoAlias , MayAlias} + }; + assert(AS.MAX_COMMON_ADDRESS <= 5); + if (AS.FLAT_ADDRESS == 0) { + assert(AS.GLOBAL_ADDRESS == 1 && + AS.REGION_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.CONSTANT_ADDRESS == 4 && + AS.PRIVATE_ADDRESS == 5); + ASAliasRules = &ASAliasRulesGenIsZero; + } else { + assert(AS.PRIVATE_ADDRESS == 0 && + AS.GLOBAL_ADDRESS == 1 && + AS.CONSTANT_ADDRESS == 2 && + AS.LOCAL_ADDRESS == 3 && + AS.FLAT_ADDRESS == 4 && + AS.REGION_ADDRESS == 5); + ASAliasRules = &ASAliasRulesPrivIsZero; + } +} + +AliasResult AMDGPUAAResult::ASAliasRulesTy::getAliasResult(unsigned AS1, + unsigned AS2) const { + if (AS1 > AS.MAX_COMMON_ADDRESS || AS2 > AS.MAX_COMMON_ADDRESS) + report_fatal_error("Pointer address space out of range"); + return (*ASAliasRules)[AS1][AS2]; +} + AliasResult AMDGPUAAResult::alias(const MemoryLocation &LocA, const MemoryLocation &LocB) { - // This array is indexed by the AMDGPUAS::AddressSpaces - // enum elements PRIVATE_ADDRESS ... to FLAT_ADDRESS - // see "llvm/Transforms/AMDSPIRUtils.h" - static const AliasResult ASAliasRules[5][5] = { - /* Private Global Constant Group Flat */ - /* Private */ {MayAlias, NoAlias , NoAlias , NoAlias , MayAlias}, - /* Global */ {NoAlias , MayAlias, NoAlias , NoAlias , MayAlias}, - /* Constant */ {NoAlias , NoAlias , MayAlias, NoAlias , MayAlias}, - /* Group */ {NoAlias , NoAlias , NoAlias , MayAlias, MayAlias}, - /* Flat */ {MayAlias, MayAlias, MayAlias, MayAlias, MayAlias} - }; unsigned asA = LocA.Ptr->getType()->getPointerAddressSpace(); unsigned asB = LocB.Ptr->getType()->getPointerAddressSpace(); - if (asA > AMDGPUAS::AddressSpaces::FLAT_ADDRESS || - asB > AMDGPUAS::AddressSpaces::FLAT_ADDRESS) - report_fatal_error("Pointer address space out of range"); - AliasResult Result = ASAliasRules[asA][asB]; + AliasResult Result = ASAliasRules.getAliasResult(asA, asB); if (Result == NoAlias) return Result; if (isa(LocA.Ptr) && isa(LocB.Ptr)) { @@ -75,8 +109,7 @@ bool AMDGPUAAResult::pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) { const Value *Base = GetUnderlyingObject(Loc.Ptr, DL); - if (Base->getType()->getPointerAddressSpace() == - AMDGPUAS::AddressSpaces::CONSTANT_ADDRESS) { + if (Base->getType()->getPointerAddressSpace() == AS.CONSTANT_ADDRESS) { return true; } diff --git a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h index 943b4a68b25..f73aa47cb93 100644 --- a/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h +++ b/lib/Target/AMDGPU/AMDGPUAliasAnalysis.h @@ -13,6 +13,7 @@ #ifndef LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H #define LLVM_ANALYSIS_AMDGPUALIASANALYSIS_H +#include "AMDGPU.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/Function.h" #include "llvm/IR/Module.h" @@ -25,11 +26,14 @@ class AMDGPUAAResult : public AAResultBase { friend AAResultBase; const DataLayout &DL; + AMDGPUAS AS; public: - explicit AMDGPUAAResult(const DataLayout &DL) : AAResultBase(), DL(DL) {} + explicit AMDGPUAAResult(const DataLayout &DL, Triple T) : AAResultBase(), + DL(DL), AS(AMDGPU::getAMDGPUAS(T)), ASAliasRules(AS) {} AMDGPUAAResult(AMDGPUAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL){} + : AAResultBase(std::move(Arg)), DL(Arg.DL), AS(Arg.AS), + ASAliasRules(Arg.ASAliasRules){} /// Handle invalidation events from the new pass manager. /// @@ -42,6 +46,15 @@ public: private: bool Aliases(const MDNode *A, const MDNode *B) const; bool PathAliases(const MDNode *A, const MDNode *B) const; + + class ASAliasRulesTy { + public: + ASAliasRulesTy(AMDGPUAS AS_); + AliasResult getAliasResult(unsigned AS1, unsigned AS2) const; + private: + AMDGPUAS AS; + const AliasResult (*ASAliasRules)[6][6]; + } ASAliasRules; }; /// Analysis pass providing a never-invalidated alias analysis result. @@ -53,7 +66,8 @@ public: typedef AMDGPUAAResult Result; AMDGPUAAResult run(Function &F, AnalysisManager &AM) { - return AMDGPUAAResult(F.getParent()->getDataLayout()); + return AMDGPUAAResult(F.getParent()->getDataLayout(), + Triple(F.getParent()->getTargetTriple())); } }; @@ -72,7 +86,8 @@ public: const AMDGPUAAResult &getResult() const { return *Result; } bool doInitialization(Module &M) override { - Result.reset(new AMDGPUAAResult(M.getDataLayout())); + Result.reset(new AMDGPUAAResult(M.getDataLayout(), + Triple(M.getTargetTriple()))); return false; } bool doFinalization(Module &M) override { diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp index 09d3ff716e6..3d8db7cd8af 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp @@ -28,7 +28,8 @@ namespace { class AMDGPUAnnotateKernelFeatures : public ModulePass { private: const TargetMachine *TM; - static bool hasAddrSpaceCast(const Function &F); + AMDGPUAS AS; + static bool hasAddrSpaceCast(const Function &F, AMDGPUAS AS); void addAttrToCallers(Function *Intrin, StringRef AttrName); bool addAttrsForIntrinsics(Module &M, ArrayRef); @@ -48,10 +49,11 @@ public: ModulePass::getAnalysisUsage(AU); } - static bool visitConstantExpr(const ConstantExpr *CE); + static bool visitConstantExpr(const ConstantExpr *CE, AMDGPUAS AS); static bool visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited); + SmallPtrSet &ConstantExprVisited, + AMDGPUAS AS); }; } @@ -65,18 +67,20 @@ INITIALIZE_PASS(AMDGPUAnnotateKernelFeatures, DEBUG_TYPE, // The queue ptr is only needed when casting to flat, not from it. -static bool castRequiresQueuePtr(unsigned SrcAS) { - return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS; +static bool castRequiresQueuePtr(unsigned SrcAS, const AMDGPUAS &AS) { + return SrcAS == AS.LOCAL_ADDRESS || SrcAS == AS.PRIVATE_ADDRESS; } -static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC) { - return castRequiresQueuePtr(ASC->getSrcAddressSpace()); +static bool castRequiresQueuePtr(const AddrSpaceCastInst *ASC, + const AMDGPUAS &AS) { + return castRequiresQueuePtr(ASC->getSrcAddressSpace(), AS); } -bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { +bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE, + AMDGPUAS AS) { if (CE->getOpcode() == Instruction::AddrSpaceCast) { unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); - return castRequiresQueuePtr(SrcAS); + return castRequiresQueuePtr(SrcAS, AS); } return false; @@ -84,7 +88,8 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExpr(const ConstantExpr *CE) { bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( const Constant *EntryC, - SmallPtrSet &ConstantExprVisited) { + SmallPtrSet &ConstantExprVisited, + AMDGPUAS AS) { if (!ConstantExprVisited.insert(EntryC).second) return false; @@ -97,7 +102,7 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( // Check this constant expression. if (const auto *CE = dyn_cast(C)) { - if (visitConstantExpr(CE)) + if (visitConstantExpr(CE, AS)) return true; } @@ -118,13 +123,14 @@ bool AMDGPUAnnotateKernelFeatures::visitConstantExprsRecursively( } // Return true if an addrspacecast is used that requires the queue ptr. -bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { +bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F, + AMDGPUAS AS) { SmallPtrSet ConstantExprVisited; for (const BasicBlock &BB : F) { for (const Instruction &I : BB) { if (const AddrSpaceCastInst *ASC = dyn_cast(&I)) { - if (castRequiresQueuePtr(ASC)) + if (castRequiresQueuePtr(ASC, AS)) return true; } @@ -133,7 +139,7 @@ bool AMDGPUAnnotateKernelFeatures::hasAddrSpaceCast(const Function &F) { if (!OpC) continue; - if (visitConstantExprsRecursively(OpC, ConstantExprVisited)) + if (visitConstantExprsRecursively(OpC, ConstantExprVisited, AS)) return true; } } @@ -173,6 +179,7 @@ bool AMDGPUAnnotateKernelFeatures::addAttrsForIntrinsics( bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { Triple TT(M.getTargetTriple()); + AS = AMDGPU::getAMDGPUAS(M); static const StringRef IntrinsicToAttr[][2] = { // .x omitted @@ -216,7 +223,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) { bool HasApertureRegs = TM && TM->getSubtarget(F).hasApertureRegs(); - if (!HasApertureRegs && hasAddrSpaceCast(F)) + if (!HasApertureRegs && hasAddrSpaceCast(F, AS)) F.addFnAttr("amdgpu-queue-ptr"); } } diff --git a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp index c011be6fa16..91b3649f5c3 100644 --- a/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp +++ b/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp @@ -37,6 +37,7 @@ class AMDGPUAnnotateUniformValues : public FunctionPass, LoopInfo *LI; DenseMap noClobberClones; bool isKernelFunc; + AMDGPUAS AMDGPUASI; public: static char ID; @@ -130,8 +131,8 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { Value *Ptr = I.getPointerOperand(); if (!DA->isUniform(Ptr)) return; - auto isGlobalLoad = [](LoadInst &Load)->bool { - return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + auto isGlobalLoad = [&](LoadInst &Load)->bool { + return Load.getPointerAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }; // We're tracking up to the Function boundaries // We cannot go beyond because of FunctionPass restrictions @@ -166,6 +167,7 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) { } bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) { + AMDGPUASI = AMDGPU::getAMDGPUAS(M); return false; } diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index accfd698108..4f2a0ca2cd0 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -17,6 +17,7 @@ // #include "AMDGPUAsmPrinter.h" +#include "AMDGPUTargetMachine.h" #include "MCTargetDesc/AMDGPUTargetStreamer.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "Utils/AMDGPUBaseInfo.h" @@ -92,7 +93,9 @@ extern "C" void LLVMInitializeAMDGPUAsmPrinter() { AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) - : AsmPrinter(TM, std::move(Streamer)) {} + : AsmPrinter(TM, std::move(Streamer)) { + AMDGPUASI = static_cast(&TM)->getAMDGPUAS(); + } StringRef AMDGPUAsmPrinter::getPassName() const { return "AMDGPU Assembly Printer"; @@ -174,7 +177,7 @@ void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // Group segment variables aren't emitted in HSA. - if (AMDGPU::isGroupSegment(GV)) + if (AMDGPU::isGroupSegment(GV, AMDGPUASI)) return; AsmPrinter::EmitGlobalVariable(GV); diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index be476e3f14d..13425c8b2a0 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -16,6 +16,7 @@ #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H #include "AMDKernelCodeT.h" +#include "AMDGPU.h" #include "llvm/ADT/StringRef.h" #include "llvm/CodeGen/AsmPrinter.h" #include @@ -150,6 +151,7 @@ public: protected: std::vector DisasmLines, HexLines; size_t DisasmLineMaxLen; + AMDGPUAS AMDGPUASI; }; } // end namespace llvm diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp index ce70d150e52..e67ae092fdd 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.cpp @@ -31,7 +31,7 @@ using namespace llvm; #endif AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI) - : CallLowering(&TLI) { + : CallLowering(&TLI), AMDGPUASI(TLI.getAMDGPUAS()) { } bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder, @@ -49,7 +49,7 @@ unsigned AMDGPUCallLowering::lowerParameterPtr(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI = MF.getRegInfo(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); LLT PtrType = getLLTForType(*PtrTy, DL); unsigned DstReg = MRI.createGenericVirtualRegister(PtrType); unsigned KernArgSegmentPtr = @@ -70,7 +70,7 @@ void AMDGPUCallLowering::lowerParameter(MachineIRBuilder &MIRBuilder, MachineFunction &MF = MIRBuilder.getMF(); const Function &F = *MF.getFunction(); const DataLayout &DL = F.getParent()->getDataLayout(); - PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(ParamTy, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned TypeSize = DL.getTypeStoreSize(ParamTy); unsigned Align = DL.getABITypeAlignment(ParamTy); diff --git a/lib/Target/AMDGPU/AMDGPUCallLowering.h b/lib/Target/AMDGPU/AMDGPUCallLowering.h index b5f3fa5617b..09bdf8ffcde 100644 --- a/lib/Target/AMDGPU/AMDGPUCallLowering.h +++ b/lib/Target/AMDGPU/AMDGPUCallLowering.h @@ -15,6 +15,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUCALLLOWERING_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" namespace llvm { @@ -22,6 +23,7 @@ namespace llvm { class AMDGPUTargetLowering; class AMDGPUCallLowering: public CallLowering { + AMDGPUAS AMDGPUASI; unsigned lowerParameterPtr(MachineIRBuilder &MIRBuilder, Type *ParamTy, unsigned Offset) const; diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index fddf94339a1..ca695c1d53c 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -67,10 +67,13 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel { // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can // make the right decision when generating code for different targets. const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; public: explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} + : SelectionDAGISel(TM, OptLevel){ + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); + } ~AMDGPUDAGToDAGISel() override = default; bool runOnMachineFunction(MachineFunction &MF) override; @@ -269,7 +272,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const { if (Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || - cast(N)->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + cast(N)->getAddressSpace() != AMDGPUASI.LOCAL_ADDRESS) return N; const SITargetLowering& Lowering = @@ -586,9 +589,9 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { if (!N->readMem()) return false; if (CbId == -1) - return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; - return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; + return N->getAddressSpace() == AMDGPUASI.CONSTANT_BUFFER_0 + CbId; } bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { @@ -1536,7 +1539,7 @@ void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { MemSDNode *Mem = cast(N); unsigned AS = Mem->getAddressSpace(); - if (AS == AMDGPUAS::FLAT_ADDRESS) { + if (AS == AMDGPUASI.FLAT_ADDRESS) { SelectCode(N); return; } diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index b57cc00a71f..af3c9ff28df 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -59,6 +59,7 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { + AMDGPUASI = AMDGPU::getAMDGPUAS(TM); // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::LOAD, MVT::f32, Promote); @@ -967,19 +968,16 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, GlobalAddressSDNode *G = cast(Op); const GlobalValue *GV = G->getGlobal(); - switch (G->getAddressSpace()) { - case AMDGPUAS::LOCAL_ADDRESS: { + if (G->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { // XXX: What does the value of G->getOffset() mean? assert(G->getOffset() == 0 && "Do not know what to do with an non-zero offset"); // TODO: We could emit code to handle the initialization somewhere. - if (hasDefinedInitializer(GV)) - break; - - unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); - return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); - } + if (!hasDefinedInitializer(GV)) { + unsigned Offset = MFI->allocateLDSGlobal(DL, *GV); + return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); + } } const Function &Fn = *DAG.getMachineFunction().getFunction(); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h index a41200ceb21..73860383fd4 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUISELLOWERING_H +#include "AMDGPU.h" #include "llvm/Target/TargetLowering.h" namespace llvm { @@ -34,6 +35,7 @@ private: protected: const AMDGPUSubtarget *Subtarget; + AMDGPUAS AMDGPUASI; SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; @@ -224,6 +226,10 @@ public: /// type of implicit parameter. uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const; + + AMDGPUAS getAMDGPUAS() const { + return AMDGPUASI; + } }; namespace AMDGPUISD { diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp index a3abb96fb94..a01f5d37c7c 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp @@ -30,7 +30,7 @@ using namespace llvm; void AMDGPUInstrInfo::anchor() {} AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST) - : AMDGPUGenInstrInfo(-1, -1), ST(ST) {} + : AMDGPUGenInstrInfo(-1, -1), ST(ST), AMDGPUASI(ST.getAMDGPUAS()) {} // FIXME: This behaves strangely. If, for example, you have 32 load + stores, // the first 16 loads will be interleaved with the stores, and the next 16 will diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/lib/Target/AMDGPU/AMDGPUInstrInfo.h index bd8e389639f..a122fd612ec 100644 --- a/lib/Target/AMDGPU/AMDGPUInstrInfo.h +++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H +#include "AMDGPU.h" #include "llvm/Target/TargetInstrInfo.h" #include "Utils/AMDGPUBaseInfo.h" @@ -35,6 +36,8 @@ private: const AMDGPUSubtarget &ST; virtual void anchor(); +protected: + AMDGPUAS AMDGPUASI; public: explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index d133851c852..8867ed689a3 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -33,7 +33,7 @@ using namespace llvm; AMDGPUInstructionSelector::AMDGPUInstructionSelector( const SISubtarget &STI, const AMDGPURegisterBankInfo &RBI) : InstructionSelector(), TII(*STI.getInstrInfo()), - TRI(*STI.getRegisterInfo()), RBI(RBI) {} + TRI(*STI.getRegisterInfo()), RBI(RBI), AMDGPUASI(STI.getAMDGPUAS()) {} MachineOperand AMDGPUInstructionSelector::getSubOperand64(MachineOperand &MO, @@ -291,7 +291,7 @@ bool AMDGPUInstructionSelector::selectSMRD(MachineInstr &I, if (!I.hasOneMemOperand()) return false; - if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if ((*I.memoperands_begin())->getAddrSpace() != AMDGPUASI.CONSTANT_ADDRESS) return false; if (!isInstrUniform(I)) diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h index 783f1408b3d..c87102e55df 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.h +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.h @@ -14,6 +14,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRUCTIONSELECTOR_H +#include "AMDGPU.h" #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -35,7 +36,6 @@ public: const AMDGPURegisterBankInfo &RBI); bool select(MachineInstr &I) const override; - private: struct GEPInfo { const MachineInstr &GEP; @@ -59,6 +59,8 @@ private: const SIInstrInfo &TII; const SIRegisterInfo &TRI; const AMDGPURegisterBankInfo &RBI; +protected: + AMDGPUAS AMDGPUASI; }; } // End llvm namespace. diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index c4ac3180453..b8d681298de 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -204,7 +204,7 @@ def COND_NULL : PatLeaf < //===----------------------------------------------------------------------===// class PrivateMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS; }]>; class PrivateLoad : PrivateMemOp < @@ -222,7 +222,7 @@ def truncstorei16_private : PrivateStore ; def store_private : PrivateStore ; class GlobalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; // Global address space loads @@ -242,7 +242,7 @@ def global_store_atomic : GlobalStore; class ConstantMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS; }]>; // Constant address space loads @@ -253,7 +253,7 @@ class ConstantLoad : ConstantMemOp < def constant_load : ConstantLoad; class LocalMemOp : PatFrag (N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; // Local address space loads @@ -266,7 +266,7 @@ class LocalStore : LocalMemOp < >; class FlatMemOp : PatFrag (N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; + return cast(N)->getAddressSPace() == AMDGPUASI.FLAT_ADDRESS; }]>; class FlatLoad : FlatMemOp < @@ -348,7 +348,7 @@ def local_store_aligned8bytes : Aligned8Bytes < class local_binary_atomic_op : PatFrag<(ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; @@ -366,7 +366,7 @@ def atomic_load_umax_local : local_binary_atomic_op; def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS; }]>; multiclass AtomicCmpSwapLocal { @@ -376,7 +376,7 @@ multiclass AtomicCmpSwapLocal { (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast(N); return AN->getMemoryVT() == MVT::i32 && - AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def _64_local : PatFrag< @@ -384,7 +384,7 @@ multiclass AtomicCmpSwapLocal { (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ AtomicSDNode *AN = cast(N); return AN->getMemoryVT() == MVT::i64 && - AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + AN->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; } @@ -394,17 +394,17 @@ multiclass global_binary_atomic_op { def "" : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def _noret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def _ret : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; } defm atomic_swap_global : global_binary_atomic_op; @@ -422,22 +422,22 @@ defm atomic_xor_global : global_binary_atomic_op; def AMDGPUatomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$value), (AMDGPUatomic_cmp_swap node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def atomic_cmp_swap_global : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS;}]>; def atomic_cmp_swap_global_noret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; def atomic_cmp_swap_global_ret : PatFrag< (ops node:$ptr, node:$cmp, node:$value), (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; + [{return cast(N)->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; //===----------------------------------------------------------------------===// // Misc Pattern Fragments diff --git a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp index e46b7ff554b..96bc53d06cd 100644 --- a/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp +++ b/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp @@ -72,6 +72,7 @@ private: Module *Mod = nullptr; const DataLayout *DL = nullptr; MDNode *MaxWorkGroupSizeRange = nullptr; + AMDGPUAS AS; // FIXME: This should be per-kernel. uint32_t LocalMemLimit = 0; @@ -154,6 +155,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { const AMDGPUSubtarget &ST = TM->getSubtarget(F); if (!ST.isPromoteAllocaEnabled()) return false; + AS = AMDGPU::getAMDGPUAS(*F.getParent()); FunctionType *FTy = F.getFunctionType(); @@ -162,7 +164,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // we cannot use local memory in the pass. for (Type *ParamTy : FTy->params()) { PointerType *PtrTy = dyn_cast(ParamTy); - if (PtrTy && PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + if (PtrTy && PtrTy->getAddressSpace() == AS.LOCAL_ADDRESS) { LocalMemLimit = 0; DEBUG(dbgs() << "Function has local memory argument. Promoting to " "local memory disabled.\n"); @@ -179,7 +181,7 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { // Check how much local memory is being used by global objects CurrentLocalMemUsage = 0; for (GlobalVariable &GV : Mod->globals()) { - if (GV.getType()->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + if (GV.getType()->getAddressSpace() != AS.LOCAL_ADDRESS) continue; for (const User *U : GV.users()) { @@ -317,7 +319,7 @@ AMDGPUPromoteAlloca::getLocalSizeYZ(IRBuilder<> &Builder) { Type *I32Ty = Type::getInt32Ty(Mod->getContext()); Value *CastDispatchPtr = Builder.CreateBitCast( - DispatchPtr, PointerType::get(I32Ty, AMDGPUAS::CONSTANT_ADDRESS)); + DispatchPtr, PointerType::get(I32Ty, AS.CONSTANT_ADDRESS)); // We could do a single 64-bit load here, but it's likely that the basic // 32-bit and extract sequence is already present, and it is probably easier @@ -413,7 +415,7 @@ static bool canVectorizeInst(Instruction *Inst, User *User) { } } -static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { +static bool tryPromoteAllocaToVector(AllocaInst *Alloca, AMDGPUAS AS) { ArrayType *AllocaTy = dyn_cast(Alloca->getAllocatedType()); DEBUG(dbgs() << "Alloca candidate for vectorization\n"); @@ -468,7 +470,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { IRBuilder<> Builder(Inst); switch (Inst->getOpcode()) { case Instruction::Load: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(0); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -480,7 +482,7 @@ static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { break; } case Instruction::Store: { - Type *VecPtrTy = VectorTy->getPointerTo(AMDGPUAS::PRIVATE_ADDRESS); + Type *VecPtrTy = VectorTy->getPointerTo(AS.PRIVATE_ADDRESS); Value *Ptr = Inst->getOperand(1); Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); @@ -673,7 +675,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { DEBUG(dbgs() << "Trying to promote " << I << '\n'); - if (tryPromoteAllocaToVector(&I)) { + if (tryPromoteAllocaToVector(&I, AS)) { DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); return; } @@ -734,7 +736,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { Twine(F->getName()) + Twine('.') + I.getName(), nullptr, GlobalVariable::NotThreadLocal, - AMDGPUAS::LOCAL_ADDRESS); + AS.LOCAL_ADDRESS); GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); GV->setAlignment(I.getAlignment()); @@ -767,7 +769,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { if (ICmpInst *CI = dyn_cast(V)) { Value *Src0 = CI->getOperand(0); Type *EltTy = Src0->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); if (isa(CI->getOperand(0))) CI->setOperand(0, ConstantPointerNull::get(NewTy)); @@ -784,7 +786,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { continue; Type *EltTy = V->getType()->getPointerElementType(); - PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + PointerType *NewTy = PointerType::get(EltTy, AS.LOCAL_ADDRESS); // FIXME: It doesn't really make sense to try to do this for all // instructions. @@ -852,7 +854,7 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) { Type *SrcTy = Src->getType()->getPointerElementType(); Function *ObjectSize = Intrinsic::getDeclaration(Mod, Intrinsic::objectsize, - { Intr->getType(), PointerType::get(SrcTy, AMDGPUAS::LOCAL_ADDRESS) } + { Intr->getType(), PointerType::get(SrcTy, AS.LOCAL_ADDRESS) } ); CallInst *NewCall = Builder.CreateCall( diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index a4bb8b9a5e5..92825684d90 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -135,6 +135,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FeatureDisable(false), InstrItins(getInstrItineraryForCPU(GPU)) { + AS = AMDGPU::getAMDGPUAS(TT); initializeSubtargetDependencies(TT, GPU, FS); } diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 39289d0f149..c61a2ff818f 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -157,6 +157,7 @@ protected: InstrItineraryData InstrItins; SelectionDAGTargetInfo TSInfo; + AMDGPUAS AS; public: AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, @@ -214,6 +215,10 @@ public: return MaxPrivateElementSize; } + AMDGPUAS getAMDGPUAS() const { + return AS; + } + bool has16BitInsts() const { return Has16BitInsts; } diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 68e78d50ef0..e8954c59479 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -240,6 +240,7 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), TLOF(createTLOF(getTargetTriple())) { + AS = AMDGPU::getAMDGPUAS(TT); initAsmInfo(); } @@ -809,3 +810,4 @@ void GCNPassConfig::addPreEmitPass() { TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { return new GCNPassConfig(this, PM); } + diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/lib/Target/AMDGPU/AMDGPUTargetMachine.h index e36e940532c..934bf7f31ba 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -35,6 +35,7 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { protected: std::unique_ptr TLOF; AMDGPUIntrinsicInfo IntrinsicInfo; + AMDGPUAS AS; StringRef getGPUName(const Function &F) const; StringRef getFeatureString(const Function &F) const; @@ -57,17 +58,16 @@ public: TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + AMDGPUAS getAMDGPUAS() const { + return AS; + } void adjustPassManager(PassManagerBuilder &) override; /// Get the integer value of a null pointer in the given address space. uint64_t getNullPointerValue(unsigned AddrSpace) const { - switch(AddrSpace) { - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || AddrSpace == AS.REGION_ADDRESS) return -1; - default: - return 0; - } + return 0; } }; diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index 1fddc88a705..c96761c0b04 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "AMDGPUTargetMachine.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "llvm/MC/MCContext.h" @@ -22,7 +23,8 @@ using namespace llvm; MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal( const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const { - if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) && + auto AS = static_cast(&TM)->getAMDGPUAS(); + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO, AS) && AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple())) return TextSection; diff --git a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h index de327786dff..ca6210f6929 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H #define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H +#include "AMDGPU.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetMachine.h" diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp index 4a6d12bd883..c5b7086dd48 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp @@ -48,7 +48,7 @@ void AMDGPUTTIImpl::getUnrollingPreferences(Loop *L, const DataLayout &DL = BB->getModule()->getDataLayout(); for (const Instruction &I : *BB) { const GetElementPtrInst *GEP = dyn_cast(&I); - if (!GEP || GEP->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) + if (!GEP || GEP->getAddressSpace() != ST->getAMDGPUAS().PRIVATE_ADDRESS) continue; const Value *Ptr = GEP->getPointerOperand(); @@ -108,25 +108,24 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) { } unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { - switch (AddrSpace) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::CONSTANT_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + AMDGPUAS AS = ST->getAMDGPUAS(); + if (AddrSpace == AS.GLOBAL_ADDRESS || + AddrSpace == AS.CONSTANT_ADDRESS || + AddrSpace == AS.FLAT_ADDRESS) return 128; - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + if (AddrSpace == AS.LOCAL_ADDRESS || + AddrSpace == AS.REGION_ADDRESS) return 64; - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddrSpace == AS.PRIVATE_ADDRESS) return 8 * ST->getMaxPrivateElementSize(); - default: - if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && - (AddrSpace == AMDGPUAS::PARAM_D_ADDRESS || - AddrSpace == AMDGPUAS::PARAM_I_ADDRESS || - (AddrSpace >= AMDGPUAS::CONSTANT_BUFFER_0 && - AddrSpace <= AMDGPUAS::CONSTANT_BUFFER_15))) - return 128; - llvm_unreachable("unhandled address space"); - } + + if (ST->getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS && + (AddrSpace == AS.PARAM_D_ADDRESS || + AddrSpace == AS.PARAM_I_ADDRESS || + (AddrSpace >= AS.CONSTANT_BUFFER_0 && + AddrSpace <= AS.CONSTANT_BUFFER_15))) + return 128; + llvm_unreachable("unhandled address space"); } bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, @@ -135,7 +134,7 @@ bool AMDGPUTTIImpl::isLegalToVectorizeMemChain(unsigned ChainSizeInBytes, // We allow vectorization of flat stores, even though we may need to decompose // them later if they may access private memory. We don't have enough context // here, and legalization can handle it. - if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { + if (AddrSpace == ST->getAMDGPUAS().PRIVATE_ADDRESS) { return (Alignment >= 4 || ST->hasUnalignedScratchAccess()) && ChainSizeInBytes <= ST->getMaxPrivateElementSize(); } @@ -362,7 +361,7 @@ bool AMDGPUTTIImpl::isSourceOfDivergence(const Value *V) const { // All other loads are not divergent, because if threads issue loads with the // same arguments, they will always get the same result. if (const LoadInst *Load = dyn_cast(V)) - return Load->getPointerAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; + return Load->getPointerAddressSpace() == ST->getAMDGPUAS().PRIVATE_ADDRESS; // Atomics are divergent because they are executed sequentially: when an // atomic operation refers to the same address in each thread, then each diff --git a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h index c64c4bf5f6a..71d6306bc1a 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h +++ b/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h @@ -110,7 +110,7 @@ public: if (IsGraphicsShader) return -1; return ST->hasFlatAddressSpace() ? - AMDGPUAS::FLAT_ADDRESS : AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + ST->getAMDGPUAS().FLAT_ADDRESS : ST->getAMDGPUAS().UNKNOWN_ADDRESS_SPACE; } unsigned getVectorSplitCost() { return 0; } diff --git a/lib/Target/AMDGPU/BUFInstructions.td b/lib/Target/AMDGPU/BUFInstructions.td index 45a7fe6d343..29f5eef67ec 100644 --- a/lib/Target/AMDGPU/BUFInstructions.td +++ b/lib/Target/AMDGPU/BUFInstructions.td @@ -21,8 +21,8 @@ def MUBUFIntrinsicVOffset : ComplexPattern : PatFrag < (ops node:$ptr), (op node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; def mubuf_load : MubufLoad ; diff --git a/lib/Target/AMDGPU/FLATInstructions.td b/lib/Target/AMDGPU/FLATInstructions.td index 849fb8ad50f..b0ac0e689a0 100644 --- a/lib/Target/AMDGPU/FLATInstructions.td +++ b/lib/Target/AMDGPU/FLATInstructions.td @@ -136,7 +136,7 @@ multiclass FLAT_Atomic_Pseudo< class flat_binary_atomic_op : PatFrag< (ops node:$ptr, node:$value), (atomic_op node:$ptr, node:$value), - [{return cast(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}] + [{return cast(N)->getAddressSpace() == AMDGPUASI.FLAT_ADDRESS;}] >; def atomic_cmp_swap_flat : flat_binary_atomic_op; @@ -284,16 +284,16 @@ defm FLAT_ATOMIC_FMAX_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2", class flat_ld : PatFrag<(ops node:$ptr), (ld node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; }]>; class flat_st : PatFrag<(ops node:$val, node:$ptr), (st node:$val, node:$ptr), [{ auto const AS = cast(N)->getAddressSpace(); - return AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::GLOBAL_ADDRESS; + return AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.GLOBAL_ADDRESS; }]>; def atomic_flat_load : flat_ld ; diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp index 7b4f25106e7..59571a48a96 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.cpp @@ -264,20 +264,18 @@ AccessQualifier MetadataStreamer::getAccessQualifier(StringRef AccQual) const { AddressSpaceQualifier MetadataStreamer::getAddressSpaceQualifer( unsigned AddressSpace) const { - switch (AddressSpace) { - case AMDGPUAS::PRIVATE_ADDRESS: + if (AddressSpace == AMDGPUASI.PRIVATE_ADDRESS) return AddressSpaceQualifier::Private; - case AMDGPUAS::GLOBAL_ADDRESS: + if (AddressSpace == AMDGPUASI.GLOBAL_ADDRESS) return AddressSpaceQualifier::Global; - case AMDGPUAS::CONSTANT_ADDRESS: + if (AddressSpace == AMDGPUASI.CONSTANT_ADDRESS) return AddressSpaceQualifier::Constant; - case AMDGPUAS::LOCAL_ADDRESS: + if (AddressSpace == AMDGPUASI.LOCAL_ADDRESS) return AddressSpaceQualifier::Local; - case AMDGPUAS::FLAT_ADDRESS: + if (AddressSpace == AMDGPUASI.FLAT_ADDRESS) return AddressSpaceQualifier::Generic; - case AMDGPUAS::REGION_ADDRESS: + if (AddressSpace == AMDGPUASI.REGION_ADDRESS) return AddressSpaceQualifier::Region; - } llvm_unreachable("Unknown address space qualifier"); } @@ -304,7 +302,7 @@ ValueKind MetadataStreamer::getValueKind(Type *Ty, StringRef TypeQual, "image3d_t", ValueKind::Image) .Default(isa(Ty) ? (Ty->getPointerAddressSpace() == - AMDGPUAS::LOCAL_ADDRESS ? + AMDGPUASI.LOCAL_ADDRESS ? ValueKind::DynamicSharedPointer : ValueKind::GlobalBuffer) : ValueKind::ByValue); @@ -460,7 +458,7 @@ void MetadataStreamer::emitKernelArgs(const Function &Func) { return; auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(), - AMDGPUAS::GLOBAL_ADDRESS); + AMDGPUASI.GLOBAL_ADDRESS); emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer); } @@ -513,7 +511,7 @@ void MetadataStreamer::emitKernelArg(const DataLayout &DL, Type *Ty, if (auto PtrTy = dyn_cast(Ty)) { auto ElTy = PtrTy->getElementType(); - if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ElTy->isSized()) + if (PtrTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS && ElTy->isSized()) Arg.mPointeeAlign = DL.getABITypeAlignment(ElTy); } @@ -576,6 +574,7 @@ void MetadataStreamer::emitKernelDebugProps( } void MetadataStreamer::begin(const Module &Mod) { + AMDGPUASI = getAMDGPUAS(Mod); emitVersion(); emitPrintf(Mod); } diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h index 12d4c5e5dd5..8d4c51763f6 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUCodeObjectMetadataStreamer.h @@ -16,6 +16,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H #define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPUCODEOBJECTMETADATASTREAMER_H +#include "AMDGPU.h" #include "AMDGPUCodeObjectMetadata.h" #include "AMDKernelCodeT.h" #include "llvm/ADT/StringRef.h" @@ -36,6 +37,7 @@ namespace CodeObject { class MetadataStreamer final { private: Metadata CodeObjectMetadata; + AMDGPUAS AMDGPUASI; void dump(StringRef YamlString) const; diff --git a/lib/Target/AMDGPU/R600ISelLowering.cpp b/lib/Target/AMDGPU/R600ISelLowering.cpp index 812555060b2..a8db5cc13b3 100644 --- a/lib/Target/AMDGPU/R600ISelLowering.cpp +++ b/lib/Target/AMDGPU/R600ISelLowering.cpp @@ -557,7 +557,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const } case Intrinsic::r600_implicitarg_ptr: { - MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUAS::PARAM_I_ADDRESS); + MVT PtrVT = getPointerTy(DAG.getDataLayout(), AMDGPUASI.PARAM_I_ADDRESS); uint32_t ByteOffset = getImplicitParameterOffset(MFI, FIRST_IMPLICIT); return DAG.getConstant(ByteOffset, DL, PtrVT); } @@ -707,12 +707,12 @@ SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); const DataLayout &DL = DAG.getDataLayout(); const GlobalValue *GV = GSD->getGlobal(); - MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT); return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA); @@ -869,7 +869,7 @@ SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT, unsigned DwordOffset) const { unsigned ByteOffset = DwordOffset * 4; PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUASI.CONSTANT_BUFFER_0); // We shouldn't be using an offset wider than 16-bits for implicit parameters. assert(isInt<16>(ByteOffset)); @@ -1107,7 +1107,7 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store, //TODO: Who creates the i8 stores? assert(Store->isTruncatingStore() || Store->getValue().getValueType() == MVT::i8); - assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS); + assert(Store->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS); SDValue Mask; if (Store->getMemoryVT() == MVT::i8) { @@ -1205,9 +1205,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); // Neither LOCAL nor PRIVATE can do vectors at the moment - if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) && + if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { - if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) { + if ((AS == AMDGPUASI.PRIVATE_ADDRESS) && + StoreNode->isTruncatingStore()) { // Add an extra level of chain to isolate this vector SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain); // TODO: can the chain be replaced without creating a new store? @@ -1230,7 +1231,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr, DAG.getConstant(2, DL, PtrVT)); - if (AS == AMDGPUAS::GLOBAL_ADDRESS) { + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // It is beneficial to create MSKOR here instead of combiner to avoid // artificial dependencies introduced by RMW if (StoreNode->isTruncatingStore()) { @@ -1283,7 +1284,7 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { } // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes - if (AS != AMDGPUAS::PRIVATE_ADDRESS) + if (AS != AMDGPUASI.PRIVATE_ADDRESS) return SDValue(); if (MemVT.bitsLT(MVT::i32)) @@ -1302,39 +1303,39 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // return (512 + (kc_bank << 12) static int -ConstantAddressBlock(unsigned AddressSpace) { +ConstantAddressBlock(unsigned AddressSpace, AMDGPUAS AMDGPUASI) { switch (AddressSpace) { - case AMDGPUAS::CONSTANT_BUFFER_0: + case AMDGPUASI.CONSTANT_BUFFER_0: return 512; - case AMDGPUAS::CONSTANT_BUFFER_1: + case AMDGPUASI.CONSTANT_BUFFER_1: return 512 + 4096; - case AMDGPUAS::CONSTANT_BUFFER_2: + case AMDGPUASI.CONSTANT_BUFFER_2: return 512 + 4096 * 2; - case AMDGPUAS::CONSTANT_BUFFER_3: + case AMDGPUASI.CONSTANT_BUFFER_3: return 512 + 4096 * 3; - case AMDGPUAS::CONSTANT_BUFFER_4: + case AMDGPUASI.CONSTANT_BUFFER_4: return 512 + 4096 * 4; - case AMDGPUAS::CONSTANT_BUFFER_5: + case AMDGPUASI.CONSTANT_BUFFER_5: return 512 + 4096 * 5; - case AMDGPUAS::CONSTANT_BUFFER_6: + case AMDGPUASI.CONSTANT_BUFFER_6: return 512 + 4096 * 6; - case AMDGPUAS::CONSTANT_BUFFER_7: + case AMDGPUASI.CONSTANT_BUFFER_7: return 512 + 4096 * 7; - case AMDGPUAS::CONSTANT_BUFFER_8: + case AMDGPUASI.CONSTANT_BUFFER_8: return 512 + 4096 * 8; - case AMDGPUAS::CONSTANT_BUFFER_9: + case AMDGPUASI.CONSTANT_BUFFER_9: return 512 + 4096 * 9; - case AMDGPUAS::CONSTANT_BUFFER_10: + case AMDGPUASI.CONSTANT_BUFFER_10: return 512 + 4096 * 10; - case AMDGPUAS::CONSTANT_BUFFER_11: + case AMDGPUASI.CONSTANT_BUFFER_11: return 512 + 4096 * 11; - case AMDGPUAS::CONSTANT_BUFFER_12: + case AMDGPUASI.CONSTANT_BUFFER_12: return 512 + 4096 * 12; - case AMDGPUAS::CONSTANT_BUFFER_13: + case AMDGPUASI.CONSTANT_BUFFER_13: return 512 + 4096 * 13; - case AMDGPUAS::CONSTANT_BUFFER_14: + case AMDGPUASI.CONSTANT_BUFFER_14: return 512 + 4096 * 14; - case AMDGPUAS::CONSTANT_BUFFER_15: + case AMDGPUASI.CONSTANT_BUFFER_15: return 512 + 4096 * 15; default: return -1; @@ -1402,7 +1403,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT MemVT = LoadNode->getMemoryVT(); ISD::LoadExtType ExtType = LoadNode->getExtensionType(); - if (AS == AMDGPUAS::PRIVATE_ADDRESS && + if (AS == AMDGPUASI.PRIVATE_ADDRESS && ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) { return lowerPrivateExtLoad(Op, DAG); } @@ -1412,13 +1413,14 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = LoadNode->getChain(); SDValue Ptr = LoadNode->getBasePtr(); - if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || - LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && + if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS || + LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) && VT.isVector()) { return scalarizeVectorLoad(LoadNode, DAG); } - int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace()); + int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace(), + AMDGPUASI); if (ConstantBlock > -1 && ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) || (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) { @@ -1450,7 +1452,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(4, DL, MVT::i32)), DAG.getConstant(LoadNode->getAddressSpace() - - AMDGPUAS::CONSTANT_BUFFER_0, DL, MVT::i32) + AMDGPUASI.CONSTANT_BUFFER_0, DL, MVT::i32) ); } @@ -1486,7 +1488,7 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { return DAG.getMergeValues(MergedValues, DL); } - if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { + if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) { return SDValue(); } @@ -1563,7 +1565,7 @@ SDValue R600TargetLowering::LowerFormalArguments( } PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), - AMDGPUAS::CONSTANT_BUFFER_0); + AMDGPUASI.CONSTANT_BUFFER_0); // i64 isn't a legal type, so the register type used ends up as i32, which // isn't expected here. It attempts to create this sextload, but it ends up diff --git a/lib/Target/AMDGPU/R600Instructions.td b/lib/Target/AMDGPU/R600Instructions.td index a5d1a0afb9f..bac557ba989 100644 --- a/lib/Target/AMDGPU/R600Instructions.td +++ b/lib/Target/AMDGPU/R600Instructions.td @@ -316,7 +316,7 @@ class VTX_READ pattern> class LoadParamFrag : PatFrag < (ops node:$ptr), (load_type node:$ptr), [{ return isConstantLoad(cast(N), 0) || - (cast(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }] + (cast(N)->getAddressSpace() == AMDGPUASI.PARAM_I_ADDRESS); }] >; def vtx_id3_az_extloadi8 : LoadParamFrag; @@ -326,8 +326,8 @@ def vtx_id3_load : LoadParamFrag; class LoadVtxId1 : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - (LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + (LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && !isa(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout()))); }]>; @@ -339,7 +339,7 @@ def vtx_id1_load : LoadVtxId1 ; class LoadVtxId2 : PatFrag < (ops node:$ptr), (load node:$ptr), [{ const MemSDNode *LD = cast(N); - return LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return LD->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && isa(GetUnderlyingObject( LD->getMemOperand()->getValue(), CurDAG->getDataLayout())); }]>; diff --git a/lib/Target/AMDGPU/SIFrameLowering.cpp b/lib/Target/AMDGPU/SIFrameLowering.cpp index 68afcca12b8..abe6af9a6d3 100644 --- a/lib/Target/AMDGPU/SIFrameLowering.cpp +++ b/lib/Target/AMDGPU/SIFrameLowering.cpp @@ -202,6 +202,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, // Emit debugger prologue if "amdgpu-debugger-emit-prologue" attribute was // specified. const SISubtarget &ST = MF.getSubtarget(); + auto AMDGPUASI = ST.getAMDGPUAS(); if (ST.debuggerEmitPrologue()) emitDebuggerPrologue(MF, MBB); @@ -340,7 +341,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF, PointerType *PtrTy = PointerType::get(Type::getInt64Ty(MF.getFunction()->getContext()), - AMDGPUAS::CONSTANT_ADDRESS); + AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); auto MMO = MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad | diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 56734345bdd..783369c8200 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -597,8 +597,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, if (AM.BaseGV) return false; - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) { // Assume the we will use FLAT for all global memory accesses // on VI. @@ -613,8 +612,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, } return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::CONSTANT_ADDRESS: + } else if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -652,11 +650,10 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return false; - case AMDGPUAS::PRIVATE_ADDRESS: + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { return isLegalMUBUFAddressingMode(AM); - - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // Basic, single offset DS instructions allow a 16-bit unsigned immediate // field. // XXX - If doing a 4-byte aligned 8-byte type access, we effectively have @@ -671,17 +668,15 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL, return true; return false; - - case AMDGPUAS::FLAT_ADDRESS: - case AMDGPUAS::UNKNOWN_ADDRESS_SPACE: + } else if (AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.UNKNOWN_ADDRESS_SPACE) { // For an unknown address space, this usually means that this is for some // reason being used for pure arithmetic, and not based on some addressing // computation. We don't have instructions that compute pointers with any // addressing modes, so treat them as having no offset like flat // instructions. return isLegalFlatAddressingMode(AM); - - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -702,8 +697,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, return false; } - if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || - AddrSpace == AMDGPUAS::REGION_ADDRESS) { + if (AddrSpace == AMDGPUASI.LOCAL_ADDRESS || + AddrSpace == AMDGPUASI.REGION_ADDRESS) { // ds_read/write_b64 require 8-byte alignment, but we can do a 4 byte // aligned, 8 byte access in a single operation using ds_read2/write2_b32 // with adjacent offsets. @@ -718,8 +713,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // will access scratch. If we had access to the IR function, then we // could determine if any private memory was used in the function. if (!Subtarget->hasUnalignedScratchAccess() && - (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS || - AddrSpace == AMDGPUAS::FLAT_ADDRESS)) { + (AddrSpace == AMDGPUASI.PRIVATE_ADDRESS || + AddrSpace == AMDGPUASI.FLAT_ADDRESS)) { return false; } @@ -727,7 +722,7 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // If we have an uniform constant load, it still requires using a slow // buffer instruction if unaligned. if (IsFast) { - *IsFast = (AddrSpace == AMDGPUAS::CONSTANT_ADDRESS) ? + *IsFast = (AddrSpace == AMDGPUASI.CONSTANT_ADDRESS) ? (Align % 4 == 0) : true; } @@ -767,15 +762,16 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, return MVT::Other; } -static bool isFlatGlobalAddrSpace(unsigned AS) { - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS; +static bool isFlatGlobalAddrSpace(unsigned AS, AMDGPUAS AMDGPUASI) { + return AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS; } bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); + return isFlatGlobalAddrSpace(SrcAS, AMDGPUASI) && + isFlatGlobalAddrSpace(DestAS, AMDGPUASI); } bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { @@ -789,7 +785,7 @@ bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { // Flat -> private/local is a simple truncate. // Flat -> global is no-op - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) + if (SrcAS == AMDGPUASI.FLAT_ADDRESS) return true; return isNoopAddrSpaceCast(SrcAS, DestAS); @@ -850,7 +846,7 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG, unsigned InputPtrReg = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR); MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); - MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS); + MVT PtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS); SDValue BasePtr = DAG.getCopyFromReg(Chain, SL, MRI.getLiveInVirtReg(InputPtrReg), PtrVT); return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr, @@ -863,7 +859,7 @@ SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, const ISD::InputArg *Arg) const { const DataLayout &DL = DAG.getDataLayout(); Type *Ty = MemVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); MachinePointerInfo PtrInfo(UndefValue::get(PtrTy)); unsigned Align = DL.getABITypeAlignment(Ty); @@ -1073,7 +1069,7 @@ SDValue SITargetLowering::LowerFormalArguments( auto *ParamTy = dyn_cast(FType->getParamType(Ins[i].getOrigArgIndex())); if (Subtarget->getGeneration() == SISubtarget::SOUTHERN_ISLANDS && - ParamTy && ParamTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + ParamTy && ParamTy->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS) { // On SI local pointers are just offsets into LDS, so they are always // less than 16-bits. On CI and newer they could potentially be // real pointers, so we can't guarantee their size. @@ -2206,13 +2202,13 @@ void SITargetLowering::createDebuggerPrologueStackObjects( bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const { const Triple &TT = getTargetMachine().getTargetTriple(); - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + return GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && AMDGPU::shouldEmitConstantsToTextSection(TT); } bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const { - return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GV->getType()->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GV->getType()->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitFixup(GV) && !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV); } @@ -2351,7 +2347,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, SelectionDAG &DAG) const { if (Subtarget->hasApertureRegs()) { // Read from Aperture Registers directly. - unsigned RegNo = (AS == AMDGPUAS::LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : + unsigned RegNo = (AS == AMDGPUASI.LOCAL_ADDRESS) ? AMDGPU::SRC_SHARED_BASE : AMDGPU::SRC_PRIVATE_BASE; return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, RegNo, MVT::i32); } @@ -2367,7 +2363,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, // Offset into amd_queue_t for group_segment_aperture_base_hi / // private_segment_aperture_base_hi. - uint32_t StructOffset = (AS == AMDGPUAS::LOCAL_ADDRESS) ? 0x40 : 0x44; + uint32_t StructOffset = (AS == AMDGPUASI.LOCAL_ADDRESS) ? 0x40 : 0x44; SDValue Ptr = DAG.getNode(ISD::ADD, SL, MVT::i64, QueuePtr, DAG.getConstant(StructOffset, SL, MVT::i64)); @@ -2376,7 +2372,7 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS, // TODO: We should use the value from the IR intrinsic call, but it might not // be available and how do we get it? Value *V = UndefValue::get(PointerType::get(Type::getInt8Ty(*DAG.getContext()), - AMDGPUAS::CONSTANT_ADDRESS)); + AMDGPUASI.CONSTANT_ADDRESS)); MachinePointerInfo PtrInfo(V, StructOffset); return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo, @@ -2397,9 +2393,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, static_cast(getTargetMachine()); // flat -> local/private - if (ASC->getSrcAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getSrcAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned DestAS = ASC->getDestAddressSpace(); - if (DestAS == AMDGPUAS::LOCAL_ADDRESS || DestAS == AMDGPUAS::PRIVATE_ADDRESS) { + + if (DestAS == AMDGPUASI.LOCAL_ADDRESS || + DestAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(DestAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); SDValue NonNull = DAG.getSetCC(SL, MVT::i1, Src, FlatNullPtr, ISD::SETNE); @@ -2411,9 +2409,11 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op, } // local/private -> flat - if (ASC->getDestAddressSpace() == AMDGPUAS::FLAT_ADDRESS) { + if (ASC->getDestAddressSpace() == AMDGPUASI.FLAT_ADDRESS) { unsigned SrcAS = ASC->getSrcAddressSpace(); - if (SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS) { + + if (SrcAS == AMDGPUASI.LOCAL_ADDRESS || + SrcAS == AMDGPUASI.PRIVATE_ADDRESS) { unsigned NullVal = TM.getNullPointerValue(SrcAS); SDValue SegmentNullPtr = DAG.getConstant(NullVal, SL, MVT::i32); @@ -2513,8 +2513,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, bool SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // We can fold offsets for anything that doesn't require a GOT relocation. - return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS || - GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) && + return (GA->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS || + GA->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) && !shouldEmitGOTReloc(GA->getGlobal()); } @@ -2565,8 +2565,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SelectionDAG &DAG) const { GlobalAddressSDNode *GSD = cast(Op); - if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS && - GSD->getAddressSpace() != AMDGPUAS::GLOBAL_ADDRESS) + if (GSD->getAddressSpace() != AMDGPUASI.CONSTANT_ADDRESS && + GSD->getAddressSpace() != AMDGPUASI.GLOBAL_ADDRESS) return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG); SDLoc DL(GSD); @@ -2583,7 +2583,7 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, SIInstrInfo::MO_GOTPCREL32); Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext()); - PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS); + PointerType *PtrTy = PointerType::get(Ty, AMDGPUASI.CONSTANT_ADDRESS); const DataLayout &DataLayout = DAG.getDataLayout(); unsigned Align = DataLayout.getABITypeAlignment(PtrTy); // FIXME: Use a PseudoSourceValue once those can be assigned an address space. @@ -3229,21 +3229,20 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = MemVT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::CONSTANT_ADDRESS: + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { if (isMemOpUniform(Load)) return SDValue(); // Non-uniform loads will be selected to MUBUF instructions, so they // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::GLOBAL_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) { if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) && isMemOpHasNoClobberedMemOperand(Load)) return SDValue(); @@ -3251,13 +3250,15 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // have the same legalization requirements as global and private // loads. // - LLVM_FALLTHROUGH; - case AMDGPUAS::FLAT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorLoad(Op, DAG); // v4 loads are supported for private and global memory. return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: + } + if (AS == AMDGPUASI.PRIVATE_ADDRESS) { // Depending on the setting of the private_element_size field in the // resource descriptor, we can only make private accesses up to a certain // size. @@ -3276,7 +3277,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - case AMDGPUAS::LOCAL_ADDRESS: + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorLoad(Op, DAG); @@ -3285,9 +3286,8 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // If properly aligned, if we split we might be able to use ds_read_b64. return SplitVectorLoad(Op, DAG); - default: - return SDValue(); } + return SDValue(); } SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { @@ -3656,18 +3656,17 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SIMachineFunctionInfo *MFI = MF.getInfo(); // If there is a possibilty that flat instruction access scratch memory // then we need to use the same legalization rules we use for private. - if (AS == AMDGPUAS::FLAT_ADDRESS) + if (AS == AMDGPUASI.FLAT_ADDRESS) AS = MFI->hasFlatScratchInit() ? - AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS; + AMDGPUASI.PRIVATE_ADDRESS : AMDGPUASI.GLOBAL_ADDRESS; unsigned NumElements = VT.getVectorNumElements(); - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: - case AMDGPUAS::FLAT_ADDRESS: + if (AS == AMDGPUASI.GLOBAL_ADDRESS || + AS == AMDGPUASI.FLAT_ADDRESS) { if (NumElements > 4) return SplitVectorStore(Op, DAG); return SDValue(); - case AMDGPUAS::PRIVATE_ADDRESS: { + } else if (AS == AMDGPUASI.PRIVATE_ADDRESS) { switch (Subtarget->getMaxPrivateElementSize()) { case 4: return scalarizeVectorStore(Store, DAG); @@ -3682,8 +3681,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("unsupported private_element_size"); } - } - case AMDGPUAS::LOCAL_ADDRESS: { + } else if (AS == AMDGPUASI.LOCAL_ADDRESS) { if (NumElements > 2) return SplitVectorStore(Op, DAG); @@ -3692,8 +3690,7 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { // If properly aligned, if we split we might be able to use ds_write_b64. return SplitVectorStore(Op, DAG); - } - default: + } else { llvm_unreachable("unhandled address space"); } } @@ -3724,7 +3721,7 @@ SDValue SITargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) co unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS)) + if (!isFlatGlobalAddrSpace(AS, AMDGPUASI)) return Op; // Non-local address space requires custom lowering for atomic compare @@ -3781,26 +3778,26 @@ SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, /// the immediate offsets of a memory instruction for the given address space. static bool canFoldOffset(unsigned OffsetSize, unsigned AS, const SISubtarget &STI) { - switch (AS) { - case AMDGPUAS::GLOBAL_ADDRESS: + auto AMDGPUASI = STI.getAMDGPUAS(); + if (AS == AMDGPUASI.GLOBAL_ADDRESS) { // MUBUF instructions a 12-bit offset in bytes. return isUInt<12>(OffsetSize); - case AMDGPUAS::CONSTANT_ADDRESS: + } + if (AS == AMDGPUASI.CONSTANT_ADDRESS) { // SMRD instructions have an 8-bit offset in dwords on SI and // a 20-bit offset in bytes on VI. if (STI.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) return isUInt<20>(OffsetSize); else return (OffsetSize % 4 == 0) && isUInt<8>(OffsetSize / 4); - case AMDGPUAS::LOCAL_ADDRESS: - case AMDGPUAS::REGION_ADDRESS: + } + if (AS == AMDGPUASI.LOCAL_ADDRESS || + AS == AMDGPUASI.REGION_ADDRESS) { // The single offset versions have a 16-bit offset in bytes. return isUInt<16>(OffsetSize); - case AMDGPUAS::PRIVATE_ADDRESS: - // Indirect register addressing does not use any offsets. - default: - return false; } + // Indirect register addressing does not use any offsets. + return false; } // (shl (add x, c1), c2) -> add (shl x, c2), (shl c1, c2) @@ -3858,7 +3855,7 @@ SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N, // TODO: We could also do this for multiplies. unsigned AS = N->getAddressSpace(); - if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) { + if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUASI.PRIVATE_ADDRESS) { SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI); if (NewPtr) { SmallVector NewOps(N->op_begin(), N->op_end()); diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index e2e0895f899..bbd8de2e7d1 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3747,7 +3747,7 @@ unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI, return AMDGPU::NoRegister; assert(!MI.memoperands_empty() && - (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS); + (*MI.memoperands_begin())->getAddrSpace() == AMDGPUASI.PRIVATE_ADDRESS); FrameIndex = Addr->getIndex(); return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg(); @@ -3854,7 +3854,7 @@ bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const { return true; for (const MachineMemOperand *MMO : MI.memoperands()) { - if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS) + if (MMO->getAddrSpace() == AMDGPUASI.FLAT_ADDRESS) return true; } return false; diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 84319153b01..561feb98d59 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -107,7 +107,7 @@ def SIld_local : SDNode <"ISD::LOAD", SDTLoad, >; def si_ld_local : PatFrag <(ops node:$ptr), (SIld_local node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def si_load_local : PatFrag <(ops node:$ptr), (si_ld_local node:$ptr), [{ @@ -144,7 +144,7 @@ def SIst_local : SDNode <"ISD::STORE", SDTStore, def si_st_local : PatFrag < (ops node:$val, node:$ptr), (SIst_local node:$val, node:$ptr), [{ - return cast(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; + return cast(N)->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS; }]>; def si_store_local : PatFrag < diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td index 5dfae3f8f3f..5b840a14dbc 100644 --- a/lib/Target/AMDGPU/SMInstructions.td +++ b/lib/Target/AMDGPU/SMInstructions.td @@ -226,9 +226,9 @@ def S_MEMREALTIME : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime> def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{ auto Ld = cast(N); return Ld->getAlignment() >= 4 && - ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS && + ((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N)) || - (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && + (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS && static_cast(getTargetLowering())->isMemOpUniform(N) && static_cast(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N))); }]>; diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index de0fda4be6f..6b9a81976c6 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCRegisterInfo.h" @@ -354,16 +355,16 @@ MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { ELF::SHF_AMDGPU_HSA_AGENT); } -bool isGroupSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; } -bool isGlobalSegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; } -bool isReadOnlySegment(const GlobalValue *GV) { - return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { + return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; } bool shouldEmitConstantsToTextSection(const Triple &TT) { @@ -736,6 +737,60 @@ bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : isUInt<20>(EncodedOffset); } - } // end namespace AMDGPU + } // end namespace llvm + +const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; +const unsigned AMDGPUAS::GLOBAL_ADDRESS; +const unsigned AMDGPUAS::LOCAL_ADDRESS; +const unsigned AMDGPUAS::PARAM_D_ADDRESS; +const unsigned AMDGPUAS::PARAM_I_ADDRESS; +const unsigned AMDGPUAS::CONSTANT_BUFFER_0; +const unsigned AMDGPUAS::CONSTANT_BUFFER_1; +const unsigned AMDGPUAS::CONSTANT_BUFFER_2; +const unsigned AMDGPUAS::CONSTANT_BUFFER_3; +const unsigned AMDGPUAS::CONSTANT_BUFFER_4; +const unsigned AMDGPUAS::CONSTANT_BUFFER_5; +const unsigned AMDGPUAS::CONSTANT_BUFFER_6; +const unsigned AMDGPUAS::CONSTANT_BUFFER_7; +const unsigned AMDGPUAS::CONSTANT_BUFFER_8; +const unsigned AMDGPUAS::CONSTANT_BUFFER_9; +const unsigned AMDGPUAS::CONSTANT_BUFFER_10; +const unsigned AMDGPUAS::CONSTANT_BUFFER_11; +const unsigned AMDGPUAS::CONSTANT_BUFFER_12; +const unsigned AMDGPUAS::CONSTANT_BUFFER_13; +const unsigned AMDGPUAS::CONSTANT_BUFFER_14; +const unsigned AMDGPUAS::CONSTANT_BUFFER_15; +const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; + +namespace llvm { +namespace AMDGPU { + +AMDGPUAS getAMDGPUAS(Triple T) { + auto Env = T.getEnvironmentName(); + AMDGPUAS AS; + if (Env == "amdgiz" || Env == "amdgizcl") { + AS.FLAT_ADDRESS = 0; + AS.CONSTANT_ADDRESS = 4; + AS.PRIVATE_ADDRESS = 5; + AS.REGION_ADDRESS = 2; + } + else { + AS.FLAT_ADDRESS = 4; + AS.CONSTANT_ADDRESS = 2; + AS.PRIVATE_ADDRESS = 0; + AS.REGION_ADDRESS = 5; + } + return AS; +} + +AMDGPUAS getAMDGPUAS(const TargetMachine &M) { + return getAMDGPUAS(M.getTargetTriple()); +} + +AMDGPUAS getAMDGPUAS(const Module &M) { + return getAMDGPUAS(Triple(M.getTargetTriple())); +} +} // namespace AMDGPU +} // namespace llvm diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 96171562ebe..0ce90284d67 100644 --- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -10,6 +10,7 @@ #ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H #define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUBASEINFO_H +#include "AMDGPU.h" #include "AMDKernelCodeT.h" #include "SIDefines.h" #include "llvm/ADT/StringRef.h" @@ -160,9 +161,9 @@ MCSection *getHSADataGlobalProgramSection(MCContext &Ctx); MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx); -bool isGroupSegment(const GlobalValue *GV); -bool isGlobalSegment(const GlobalValue *GV); -bool isReadOnlySegment(const GlobalValue *GV); +bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS); +bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS); /// \returns True if constants should be emitted to .text section for given /// target triple \p TT, false otherwise.