From: Eli Bendersky Date: Tue, 15 Apr 2014 16:57:05 +0000 (+0000) Subject: Add support for CUDA __launch_bounds__ attribute to CodeGen. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=57e9d5ea97f046f67c097cf4d19883d9078c4805;p=clang Add support for CUDA __launch_bounds__ attribute to CodeGen. Sema does have a CUDALaunchBoundsAttr, but CodeGen was doing nothing with it. This change translates CUDALaunchBoundsAttr to maxntidx and minctasm metadata, which NVPTX then translates to the correct PTX directives. Patch by Manjunath Kudlur. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@206302 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/TargetInfo.cpp b/lib/CodeGen/TargetInfo.cpp index 64f32097c9..fc51c3b1b0 100644 --- a/lib/CodeGen/TargetInfo.cpp +++ b/lib/CodeGen/TargetInfo.cpp @@ -4770,7 +4770,9 @@ public: void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const override; private: - static void addKernelMetadata(llvm::Function *F); + // Adds a NamedMDNode with F, Name, and Operand as operands, and adds the + // resulting MDNode to the nvvm.annotations MDNode. + static void addNVVMMetadata(llvm::Function *F, StringRef Name, int Operand); }; ABIArgInfo NVPTXABIInfo::classifyReturnType(QualType RetTy) const { @@ -4829,7 +4831,8 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, // By default, all functions are device functions if (FD->hasAttr()) { // OpenCL __kernel functions get kernel metadata - addKernelMetadata(F); + // Create !{, metadata !"kernel", i32 1} node + addNVVMMetadata(F, "kernel", 1); // And kernel functions are not subject to inlining F->addFnAttr(llvm::Attribute::NoInline); } @@ -4840,28 +4843,43 @@ SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV, // CUDA __global__ functions get a kernel metadata entry. Since // __global__ functions cannot be called from the device, we do not // need to set the noinline attribute. - if (FD->hasAttr()) - addKernelMetadata(F); + if (FD->hasAttr()) { + // Create !{, metadata !"kernel", i32 1} node + addNVVMMetadata(F, "kernel", 1); + } + if (FD->hasAttr()) { + // Create !{, metadata !"maxntidx", i32 } node + addNVVMMetadata(F, "maxntidx", + FD->getAttr()->getMaxThreads()); + // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a + // zero value from getMinBlocks either means it was not specified in + // __launch_bounds__ or the user specified a 0 value. In both cases, we + // don't have to add a PTX directive. + int MinCTASM = FD->getAttr()->getMinBlocks(); + if (MinCTASM > 0) { + // Create !{, metadata !"minctasm", i32 } node + addNVVMMetadata(F, "minctasm", MinCTASM); + } + } } } -void NVPTXTargetCodeGenInfo::addKernelMetadata(llvm::Function *F) { +void NVPTXTargetCodeGenInfo::addNVVMMetadata(llvm::Function *F, StringRef Name, + int Operand) { llvm::Module *M = F->getParent(); llvm::LLVMContext &Ctx = M->getContext(); // Get "nvvm.annotations" metadata node llvm::NamedMDNode *MD = M->getOrInsertNamedMetadata("nvvm.annotations"); - // Create !{, metadata !"kernel", i32 1} node llvm::SmallVector MDVals; MDVals.push_back(F); - MDVals.push_back(llvm::MDString::get(Ctx, "kernel")); - MDVals.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1)); - + MDVals.push_back(llvm::MDString::get(Ctx, Name)); + MDVals.push_back( + llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), Operand)); // Append metadata to nvvm.annotations MD->addOperand(llvm::MDNode::get(Ctx, MDVals)); } - } //===----------------------------------------------------------------------===//