}
/// Emit a block literal expression in the current function.
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr,
- llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const BlockExpr *blockExpr) {
// If the block has no captures, we won't have a pre-computed
// layout for it.
if (!blockExpr->getBlockDecl()->hasCaptures()) {
// The block literal is emitted as a global variable, and the block invoke
// function has to be extracted from its initializer.
if (llvm::Constant *Block = CGM.getAddrOfGlobalBlockIfEmitted(blockExpr)) {
- if (InvokeF) {
- auto *GV = cast<llvm::GlobalVariable>(
- cast<llvm::Constant>(Block)->stripPointerCasts());
- auto *BlockInit = cast<llvm::ConstantStruct>(GV->getInitializer());
- *InvokeF = cast<llvm::Function>(
- BlockInit->getAggregateElement(2)->stripPointerCasts());
- }
return Block;
}
CGBlockInfo blockInfo(blockExpr->getBlockDecl(), CurFn->getName());
computeBlockInfo(CGM, this, blockInfo);
blockInfo.BlockExpression = blockExpr;
- return EmitBlockLiteral(blockInfo, InvokeF);
+ return EmitBlockLiteral(blockInfo);
}
// Find the block info for this block and take ownership of it.
blockExpr->getBlockDecl()));
blockInfo->BlockExpression = blockExpr;
- return EmitBlockLiteral(*blockInfo, InvokeF);
+ return EmitBlockLiteral(*blockInfo);
}
-llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo,
- llvm::Function **InvokeF) {
+llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
auto GenVoidPtrTy =
IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
BlockCGF.SanOpts = SanOpts;
auto *InvokeFn = BlockCGF.GenerateBlockFunction(
CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
- if (InvokeF)
- *InvokeF = InvokeFn;
auto *blockFn = llvm::ConstantExpr::getPointerCast(InvokeFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
llvm::Value *result = Builder.CreatePointerCast(
blockAddr.getPointer(), ConvertType(blockInfo.getBlockExpr()->getType()));
+ if (IsOpenCL) {
+ CGM.getOpenCLRuntime().recordBlockInfo(blockInfo.BlockExpression, InvokeFn,
+ result);
+ }
+
return result;
}
llvm::Constant *Result =
llvm::ConstantExpr::getPointerCast(literal, RequiredType);
CGM.setAddrOfGlobalBlock(blockInfo.BlockExpression, Result);
+ if (CGM.getContext().getLangOpts().OpenCL)
+ CGM.getOpenCLRuntime().recordBlockInfo(
+ blockInfo.BlockExpression,
+ cast<llvm::Function>(blockFn->stripPointerCasts()), Result);
return Result;
}
CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
}
+/// Record emitted llvm invoke function and llvm block literal for the
+/// corresponding block expression.
+void CGOpenCLRuntime::recordBlockInfo(const BlockExpr *E,
+ llvm::Function *InvokeF,
+ llvm::Value *Block) {
+ assert(EnqueuedBlockMap.find(E) == EnqueuedBlockMap.end() &&
+ "Block expression emitted twice");
+ assert(isa<llvm::Function>(InvokeF) && "Invalid invoke function");
+ assert(Block->getType()->isPointerTy() && "Invalid block literal type");
+ EnqueuedBlockMap[E].InvokeFunc = InvokeF;
+ EnqueuedBlockMap[E].BlockArg = Block;
+ EnqueuedBlockMap[E].Kernel = nullptr;
+}
+
CGOpenCLRuntime::EnqueuedBlockInfo
CGOpenCLRuntime::emitOpenCLEnqueuedBlock(CodeGenFunction &CGF, const Expr *E) {
+ CGF.EmitScalarExpr(E);
+
// The block literal may be assigned to a const variable. Chasing down
// to get the block literal.
if (auto DR = dyn_cast<DeclRefExpr>(E)) {
E = cast<VarDecl>(DR->getDecl())->getInit();
}
+ E = E->IgnoreImplicit();
if (auto Cast = dyn_cast<CastExpr>(E)) {
E = Cast->getSubExpr();
}
auto *Block = cast<BlockExpr>(E);
- // The same block literal may be enqueued multiple times. Cache it if
- // possible.
- auto Loc = EnqueuedBlockMap.find(Block);
- if (Loc != EnqueuedBlockMap.end()) {
- return Loc->second;
+ assert(EnqueuedBlockMap.find(Block) != EnqueuedBlockMap.end() &&
+ "Block expression not emitted");
+
+ // Do not emit the block wrapper again if it has been emitted.
+ if (EnqueuedBlockMap[Block].Kernel) {
+ return EnqueuedBlockMap[Block];
}
- // Emit block literal as a common block expression and get the block invoke
- // function.
- llvm::Function *Invoke;
- auto *V = CGF.EmitBlockLiteral(cast<BlockExpr>(Block), &Invoke);
auto *F = CGF.getTargetHooks().createEnqueuedBlockKernel(
- CGF, Invoke, V->stripPointerCasts());
+ CGF, EnqueuedBlockMap[Block].InvokeFunc,
+ EnqueuedBlockMap[Block].BlockArg->stripPointerCasts());
// The common part of the post-processing of the kernel goes here.
F->addFnAttr(llvm::Attribute::NoUnwind);
F->setCallingConv(
CGF.getTypes().ClangCallConvToLLVMCallConv(CallingConv::CC_OpenCLKernel));
- EnqueuedBlockInfo Info{F, V};
- EnqueuedBlockMap[Block] = Info;
- return Info;
+ EnqueuedBlockMap[Block].Kernel = F;
+ return EnqueuedBlockMap[Block];
}
namespace clang {
+class BlockExpr;
class Expr;
class VarDecl;
/// Structure for enqueued block information.
struct EnqueuedBlockInfo {
- llvm::Function *Kernel; /// Enqueued block kernel.
- llvm::Value *BlockArg; /// The first argument to enqueued block kernel.
+ llvm::Function *InvokeFunc; /// Block invoke function.
+ llvm::Function *Kernel; /// Enqueued block kernel.
+ llvm::Value *BlockArg; /// The first argument to enqueued block kernel.
};
/// Maps block expression to block information.
llvm::DenseMap<const Expr *, EnqueuedBlockInfo> EnqueuedBlockMap;
/// \return enqueued block information for enqueued block.
EnqueuedBlockInfo emitOpenCLEnqueuedBlock(CodeGenFunction &CGF,
const Expr *E);
+
+ /// \brief Record invoke function and block literal emitted during normal
+ /// codegen for a block expression. The information is used by
+ /// emitOpenCLEnqueuedBlock to emit wrapper kernel.
+ ///
+ /// \param InvokeF invoke function emitted for the block expression.
+ /// \param Block block literal emitted for the block expression.
+ void recordBlockInfo(const BlockExpr *E, llvm::Function *InvokeF,
+ llvm::Value *Block);
};
}
/// \return an LLVM value which is a pointer to a struct which contains
/// information about the block, including the block invoke function, the
/// captured variables, etc.
- /// \param InvokeF will contain the block invoke function if it is not
- /// nullptr.
- llvm::Value *EmitBlockLiteral(const BlockExpr *,
- llvm::Function **InvokeF = nullptr);
+ llvm::Value *EmitBlockLiteral(const BlockExpr *);
static void destroyBlockInfos(CGBlockInfo *info);
llvm::Function *GenerateBlockFunction(GlobalDecl GD,
LValue EmitOMPSharedLValue(const Expr *E);
private:
- /// Helpers for blocks. Returns invoke function by \p InvokeF if it is not
- /// nullptr. It should be called without \p InvokeF if the caller does not
- /// need invoke function to be returned.
- llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info,
- llvm::Function **InvokeF = nullptr);
+ /// Helpers for blocks.
+ llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
/// struct with the values to be passed to the OpenMP loop-related functions
struct OMPLoopArguments {
typedef struct {int a;} ndrange_t;
+void callee(long id, global long *out) {
+ out[id] = id;
+}
+
// CHECK-LABEL: define amdgpu_kernel void @test
kernel void test(global char *a, char b, global long *c, long d) {
queue_t default_queue;
c[0] = d;
((local int*)lp)[0] = 1;
}, 100);
+
+ void (^block)(void) = ^{
+ callee(d, c);
+ };
+
+ enqueue_kernel(default_queue, flags, ndrange, block);
}
// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i8 }>)
// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_3_kernel(<{ i32, i32, i8*, i8 addrspace(1)*, i64 addrspace(1)*, i64, i8 }>, i8 addrspace(3)*)
// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+// CHECK-LABEL: define internal amdgpu_kernel void @__test_block_invoke_4_kernel(<{ i32, i32, i8*, i64, i64 addrspace(1)* }>)
+// CHECK-SAME: #[[ATTR]] !kernel_arg_addr_space !{{.*}} !kernel_arg_access_qual !{{.*}} !kernel_arg_type !{{.*}} !kernel_arg_base_type !{{.*}} !kernel_arg_type_qual !{{.*}}
+
// CHECK: attributes #[[ATTR]] = { nounwind "enqueued-block" }
// COMMON: define internal spir_func void [[INV_G]](i8 addrspace(4)* %{{.*}}, i8 addrspace(3)* %{{.*}})
const bl_t block_G = (bl_t) ^ (local void *a) {};
+void callee(int id, __global int *out) {
+ out[id] = id;
+}
+
// COMMON-LABEL: define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: %default_queue = alloca %opencl.queue_t*
// COMMON: call spir_func void [[r2]](i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BLG8]] to i8 addrspace(1)*) to i8 addrspace(4)*))
block_A();
+ void (^block_C)(void) = ^{
+ callee(i, a);
+ };
+
+ // Emits block literal on stack and block kernel [[INVLK3]].
+ // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* [[INVL3:@__device_side_enqueue_block_invoke[^ ]*]] to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke
+ // COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
+ // COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
+ // COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* {{.*}} to i8 addrspace(4)*
+ // COMMON-LABEL: call i32 @__enqueue_kernel_basic(
+ // COMMON-SAME: %opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}},
+ // COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVLK3:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
+ // COMMON-SAME: i8 addrspace(4)* [[BL_I8]])
+ enqueue_kernel(default_queue, flags, ndrange, block_C);
+
// Emits global block literal [[BLG9]] and block kernel [[INVGK9]]. [[INVGK9]] calls [[INV9]].
// COMMON: call i32 @__get_kernel_work_group_size_impl(
// COMMON-SAME: i8 addrspace(4)* addrspacecast (i8* bitcast ({{.*}} [[INVGK9:[^ ]+_kernel]] to i8*) to i8 addrspace(4)*),
// COMMON: define internal spir_func void [[INVG8]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_func void [[INVG9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)* %{{.*}})
// COMMON: define internal spir_kernel void [[INVGK8]](i8 addrspace(4)*{{.*}})
+// COMMON: define internal spir_kernel void [[INVLK3]](i8 addrspace(4)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK9]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INV_G_K]](i8 addrspace(4)*{{.*}}, i8 addrspace(3)*{{.*}})
// COMMON: define internal spir_kernel void [[INVGK10]](i8 addrspace(4)*{{.*}})