From: Valery Pykhtin Date: Fri, 7 Jun 2019 12:16:46 +0000 (+0000) Subject: [AMDGPU] Constrain the AMDGPU inliner on maximum number of basic blocks in a caller... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=9c7e6b20488250c0ce9928ae694e8dc4785454a1;p=llvm [AMDGPU] Constrain the AMDGPU inliner on maximum number of basic blocks in a caller function (compile time performance) Differential revision: https://reviews.llvm.org/D62917 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@362789 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUInline.cpp b/lib/Target/AMDGPU/AMDGPUInline.cpp index 72388a195de..ba9fdf2dc60 100644 --- a/lib/Target/AMDGPU/AMDGPUInline.cpp +++ b/lib/Target/AMDGPU/AMDGPUInline.cpp @@ -49,6 +49,12 @@ static cl::opt ArgAllocaCutoff("amdgpu-inline-arg-alloca-cutoff", cl::Hidden, cl::init(256), cl::desc("Maximum alloca size to use for inline cost")); +// Inliner constraint to achieve reasonable compilation time +static cl::opt +MaxBB("amdgpu-inline-max-bb", cl::Hidden, cl::init(300), + cl::desc("Maximum BB number allowed in a function after inlining" + " (compile time constraint)")); + namespace { class AMDGPUInliner : public LegacyInlinerBase { @@ -208,7 +214,15 @@ InlineCost AMDGPUInliner::getInlineCost(CallSite CS) { return ACT->getAssumptionCache(F); }; - return llvm::getInlineCost(cast(*CS.getInstruction()), Callee, + auto IC = llvm::getInlineCost(cast(*CS.getInstruction()), Callee, LocalParams, TTI, GetAssumptionCache, None, PSI, RemarksEnabled ? &ORE : nullptr); + + if (IC && !IC.isAlways()) { + // Single BB does not increase total BB amount, thus subtract 1 + size_t Size = Caller->size() + Callee->size() - 1; + if (MaxBB && Size > MaxBB) + return llvm::InlineCost::getNever("max number of bb exceeded"); + } + return IC; } diff --git a/test/CodeGen/AMDGPU/inline-maxbb.ll b/test/CodeGen/AMDGPU/inline-maxbb.ll new file mode 100644 index 00000000000..7c082b3b6db --- /dev/null +++ b/test/CodeGen/AMDGPU/inline-maxbb.ll @@ -0,0 +1,33 @@ +; RUN: opt -mtriple=amdgcn-- --amdgpu-inline -S -amdgpu-inline-max-bb=2 %s | FileCheck %s --check-prefix=NOINL +; RUN: opt -mtriple=amdgcn-- --amdgpu-inline -S -amdgpu-inline-max-bb=3 %s | FileCheck %s --check-prefix=INL + +define i32 @callee(i32 %x) { +entry: + %cc = icmp eq i32 %x, 1 + br i1 %cc, label %ret_res, label %mulx + +mulx: + %mul1 = mul i32 %x, %x + %mul2 = mul i32 %mul1, %x + %mul3 = mul i32 %mul1, %mul2 + %mul4 = mul i32 %mul3, %mul2 + %mul5 = mul i32 %mul4, %mul3 + br label %ret_res + +ret_res: + %r = phi i32 [ %mul5, %mulx ], [ %x, %entry ] + ret i32 %r +} + +; INL-LABEL: @caller +; NOINL-LABEL: @caller +; INL: mul i32 +; INL-NOT: call i32 +; NOINL-NOT: mul i32 +; NOINL: call i32 + +define amdgpu_kernel void @caller(i32 %x) { + %res = call i32 @callee(i32 %x) + store volatile i32 %res, i32 addrspace(1)* undef + ret void +}