#include "CGBlocks.h"
#include "CGDebugInfo.h"
#include "CGObjCRuntime.h"
+#include "CGOpenCLRuntime.h"
#include "CodeGenFunction.h"
#include "CodeGenModule.h"
#include "ConstantEmitter.h"
-#include "clang/CodeGen/ConstantInitBuilder.h"
+#include "TargetInfo.h"
#include "clang/AST/DeclObjC.h"
+#include "clang/CodeGen/ConstantInitBuilder.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
static void initializeForBlockHeader(CodeGenModule &CGM, CGBlockInfo &info,
SmallVectorImpl<llvm::Type*> &elementTypes) {
- // The header is basically 'struct { void *; int; int; void *; void *; }'.
- // Assert that that struct is packed.
- assert(CGM.getIntSize() <= CGM.getPointerSize());
- assert(CGM.getIntAlign() <= CGM.getPointerAlign());
- assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
-
- info.BlockAlign = CGM.getPointerAlign();
- info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize();
assert(elementTypes.empty());
- elementTypes.push_back(CGM.VoidPtrTy);
- elementTypes.push_back(CGM.IntTy);
- elementTypes.push_back(CGM.IntTy);
- elementTypes.push_back(CGM.VoidPtrTy);
- elementTypes.push_back(CGM.getBlockDescriptorType());
+ if (CGM.getLangOpts().OpenCL) {
+ // The header is basically 'struct { int; int; generic void *;
+ // custom_fields; }'. Assert that struct is packed.
+ auto GenPtrAlign = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerAlign(LangAS::opencl_generic) / 8);
+ auto GenPtrSize = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerWidth(LangAS::opencl_generic) / 8);
+ assert(CGM.getIntSize() <= GenPtrSize);
+ assert(CGM.getIntAlign() <= GenPtrAlign);
+ assert((2 * CGM.getIntSize()).isMultipleOf(GenPtrAlign));
+ elementTypes.push_back(CGM.IntTy); /* total size */
+ elementTypes.push_back(CGM.IntTy); /* align */
+ elementTypes.push_back(
+ CGM.getOpenCLRuntime()
+ .getGenericVoidPointerType()); /* invoke function */
+ unsigned Offset =
+ 2 * CGM.getIntSize().getQuantity() + GenPtrSize.getQuantity();
+ unsigned BlockAlign = GenPtrAlign.getQuantity();
+ if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldTypes()) /* custom fields */ {
+ // TargetOpenCLBlockHelp needs to make sure the struct is packed.
+ // If necessary, add padding fields to the custom fields.
+ unsigned Align = CGM.getDataLayout().getABITypeAlignment(I);
+ if (BlockAlign < Align)
+ BlockAlign = Align;
+ assert(Offset % Align == 0);
+ Offset += CGM.getDataLayout().getTypeAllocSize(I);
+ elementTypes.push_back(I);
+ }
+ }
+ info.BlockAlign = CharUnits::fromQuantity(BlockAlign);
+ info.BlockSize = CharUnits::fromQuantity(Offset);
+ } else {
+ // The header is basically 'struct { void *; int; int; void *; void *; }'.
+ // Assert that that struct is packed.
+ assert(CGM.getIntSize() <= CGM.getPointerSize());
+ assert(CGM.getIntAlign() <= CGM.getPointerAlign());
+ assert((2 * CGM.getIntSize()).isMultipleOf(CGM.getPointerAlign()));
+ info.BlockAlign = CGM.getPointerAlign();
+ info.BlockSize = 3 * CGM.getPointerSize() + 2 * CGM.getIntSize();
+ elementTypes.push_back(CGM.VoidPtrTy);
+ elementTypes.push_back(CGM.IntTy);
+ elementTypes.push_back(CGM.IntTy);
+ elementTypes.push_back(CGM.VoidPtrTy);
+ elementTypes.push_back(CGM.getBlockDescriptorType());
+ }
}
static QualType getCaptureFieldType(const CodeGenFunction &CGF,
SmallVector<llvm::Type*, 8> elementTypes;
initializeForBlockHeader(CGM, info, elementTypes);
-
- if (!block->hasCaptures()) {
+ bool hasNonConstantCustomFields = false;
+ if (auto *OpenCLHelper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper())
+ hasNonConstantCustomFields =
+ !OpenCLHelper->areAllCustomFieldValuesConstant(info);
+ if (!block->hasCaptures() && !hasNonConstantCustomFields) {
info.StructureType =
llvm::StructType::get(CGM.getLLVMContext(), elementTypes, true);
info.CanBeGlobal = true;
}
llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) {
+ bool IsOpenCL = CGM.getContext().getLangOpts().OpenCL;
+ auto GenVoidPtrTy =
+ IsOpenCL ? CGM.getOpenCLRuntime().getGenericVoidPointerType() : VoidPtrTy;
+ unsigned GenVoidPtrAddr = IsOpenCL ? LangAS::opencl_generic : LangAS::Default;
+ auto GenVoidPtrSize = CharUnits::fromQuantity(
+ CGM.getTarget().getPointerWidth(GenVoidPtrAddr) / 8);
// Using the computed layout, generate the actual block function.
bool isLambdaConv = blockInfo.getBlockDecl()->isConversionFromLambda();
- llvm::Constant *blockFn
- = CodeGenFunction(CGM, true).GenerateBlockFunction(CurGD, blockInfo,
- LocalDeclMap,
- isLambdaConv,
- blockInfo.CanBeGlobal);
- blockFn = llvm::ConstantExpr::getBitCast(blockFn, VoidPtrTy);
+ llvm::Constant *blockFn = CodeGenFunction(CGM, true).GenerateBlockFunction(
+ CurGD, blockInfo, LocalDeclMap, isLambdaConv, blockInfo.CanBeGlobal);
+ blockFn = llvm::ConstantExpr::getPointerCast(blockFn, GenVoidPtrTy);
// If there is nothing to capture, we can emit this as a global block.
if (blockInfo.CanBeGlobal)
// Otherwise, we have to emit this as a local block.
- llvm::Constant *isa =
- (!CGM.getContext().getLangOpts().OpenCL)
- ? CGM.getNSConcreteStackBlock()
- : CGM.getNullPointer(VoidPtrPtrTy,
- CGM.getContext().getPointerType(
- QualType(CGM.getContext().VoidPtrTy)));
- isa = llvm::ConstantExpr::getBitCast(isa, VoidPtrTy);
-
- // Build the block descriptor.
- llvm::Constant *descriptor = buildBlockDescriptor(CGM, blockInfo);
-
Address blockAddr = blockInfo.LocalAddress;
assert(blockAddr.isValid() && "block has no address!");
- // Compute the initial on-stack block flags.
- BlockFlags flags = BLOCK_HAS_SIGNATURE;
- if (blockInfo.HasCapturedVariableLayout) flags |= BLOCK_HAS_EXTENDED_LAYOUT;
- if (blockInfo.NeedsCopyDispose) flags |= BLOCK_HAS_COPY_DISPOSE;
- if (blockInfo.HasCXXObject) flags |= BLOCK_HAS_CXX_OBJ;
- if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET;
+ llvm::Constant *isa;
+ llvm::Constant *descriptor;
+ BlockFlags flags;
+ if (!IsOpenCL) {
+ isa = llvm::ConstantExpr::getBitCast(CGM.getNSConcreteStackBlock(),
+ VoidPtrTy);
+
+ // Build the block descriptor.
+ descriptor = buildBlockDescriptor(CGM, blockInfo);
+
+ // Compute the initial on-stack block flags.
+ flags = BLOCK_HAS_SIGNATURE;
+ if (blockInfo.HasCapturedVariableLayout)
+ flags |= BLOCK_HAS_EXTENDED_LAYOUT;
+ if (blockInfo.NeedsCopyDispose)
+ flags |= BLOCK_HAS_COPY_DISPOSE;
+ if (blockInfo.HasCXXObject)
+ flags |= BLOCK_HAS_CXX_OBJ;
+ if (blockInfo.UsesStret)
+ flags |= BLOCK_USE_STRET;
+ }
auto projectField =
[&](unsigned index, CharUnits offset, const Twine &name) -> Address {
index++;
};
- addHeaderField(isa, getPointerSize(), "block.isa");
- addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
- getIntSize(), "block.flags");
- addHeaderField(llvm::ConstantInt::get(IntTy, 0),
- getIntSize(), "block.reserved");
- addHeaderField(blockFn, getPointerSize(), "block.invoke");
- addHeaderField(descriptor, getPointerSize(), "block.descriptor");
+ if (!IsOpenCL) {
+ addHeaderField(isa, getPointerSize(), "block.isa");
+ addHeaderField(llvm::ConstantInt::get(IntTy, flags.getBitMask()),
+ getIntSize(), "block.flags");
+ addHeaderField(llvm::ConstantInt::get(IntTy, 0), getIntSize(),
+ "block.reserved");
+ } else {
+ addHeaderField(
+ llvm::ConstantInt::get(IntTy, blockInfo.BlockSize.getQuantity()),
+ getIntSize(), "block.size");
+ addHeaderField(
+ llvm::ConstantInt::get(IntTy, blockInfo.BlockAlign.getQuantity()),
+ getIntSize(), "block.align");
+ }
+ addHeaderField(blockFn, GenVoidPtrSize, "block.invoke");
+ if (!IsOpenCL)
+ addHeaderField(descriptor, getPointerSize(), "block.descriptor");
+ else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldValues(*this, blockInfo)) {
+ addHeaderField(
+ I.first,
+ CharUnits::fromQuantity(
+ CGM.getDataLayout().getTypeAllocSize(I.first->getType())),
+ I.second);
+ }
+ }
}
// Finally, capture all the values into the block.
llvm::Type *BlockDescPtrTy = getBlockDescriptorType();
- // struct __block_literal_generic {
- // void *__isa;
- // int __flags;
- // int __reserved;
- // void (*__invoke)(void *);
- // struct __block_descriptor *__descriptor;
- // };
- GenericBlockLiteralType =
- llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
- IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ if (getLangOpts().OpenCL) {
+ // struct __opencl_block_literal_generic {
+ // int __size;
+ // int __align;
+ // __generic void *__invoke;
+ // /* custom fields */
+ // };
+ SmallVector<llvm::Type *, 8> StructFields(
+ {IntTy, IntTy, getOpenCLRuntime().getGenericVoidPointerType()});
+ if (auto *Helper = getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldTypes())
+ StructFields.push_back(I);
+ }
+ GenericBlockLiteralType = llvm::StructType::create(
+ StructFields, "struct.__opencl_block_literal_generic");
+ } else {
+ // struct __block_literal_generic {
+ // void *__isa;
+ // int __flags;
+ // int __reserved;
+ // void (*__invoke)(void *);
+ // struct __block_descriptor *__descriptor;
+ // };
+ GenericBlockLiteralType =
+ llvm::StructType::create("struct.__block_literal_generic", VoidPtrTy,
+ IntTy, IntTy, VoidPtrTy, BlockDescPtrTy);
+ }
return GenericBlockLiteralType;
}
-RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
+RValue CodeGenFunction::EmitBlockCallExpr(const CallExpr *E,
ReturnValueSlot ReturnValue) {
const BlockPointerType *BPT =
E->getCallee()->getType()->getAs<BlockPointerType>();
// Get the function pointer from the literal.
llvm::Value *FuncPtr =
- Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr, 3);
-
+ Builder.CreateStructGEP(CGM.getGenericBlockLiteralType(), BlockPtr,
+ CGM.getLangOpts().OpenCL ? 2 : 3);
// Add the block literal.
CallArgList Args;
QualType VoidPtrQualTy = getContext().VoidPtrTy;
llvm::Type *GenericVoidPtrTy = VoidPtrTy;
if (getLangOpts().OpenCL) {
- GenericVoidPtrTy = Builder.getInt8PtrTy(
- getContext().getTargetAddressSpace(LangAS::opencl_generic));
+ GenericVoidPtrTy = CGM.getOpenCLRuntime().getGenericVoidPointerType();
VoidPtrQualTy =
getContext().getPointerType(getContext().getAddrSpaceQualType(
getContext().VoidTy, LangAS::opencl_generic));
llvm::Type *BlockFTy = CGM.getTypes().GetFunctionType(FnInfo);
llvm::Type *BlockFTyPtr = llvm::PointerType::getUnqual(BlockFTy);
- Func = Builder.CreateBitCast(Func, BlockFTyPtr);
+ Func = Builder.CreatePointerCast(Func, BlockFTyPtr);
// Prepare the callee.
CGCallee Callee(CGCalleeInfo(), Func);
ConstantInitBuilder builder(CGM);
auto fields = builder.beginStruct();
- // isa
- fields.add((!CGM.getContext().getLangOpts().OpenCL)
- ? CGM.getNSConcreteGlobalBlock()
- : CGM.getNullPointer(CGM.VoidPtrPtrTy,
- CGM.getContext().getPointerType(QualType(
- CGM.getContext().VoidPtrTy))));
+ bool IsOpenCL = CGM.getLangOpts().OpenCL;
+ if (!IsOpenCL) {
+ // isa
+ fields.add(CGM.getNSConcreteGlobalBlock());
+
+ // __flags
+ BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
+ if (blockInfo.UsesStret)
+ flags |= BLOCK_USE_STRET;
- // __flags
- BlockFlags flags = BLOCK_IS_GLOBAL | BLOCK_HAS_SIGNATURE;
- if (blockInfo.UsesStret) flags |= BLOCK_USE_STRET;
-
- fields.addInt(CGM.IntTy, flags.getBitMask());
+ fields.addInt(CGM.IntTy, flags.getBitMask());
- // Reserved
- fields.addInt(CGM.IntTy, 0);
+ // Reserved
+ fields.addInt(CGM.IntTy, 0);
+ } else {
+ fields.addInt(CGM.IntTy, blockInfo.BlockSize.getQuantity());
+ fields.addInt(CGM.IntTy, blockInfo.BlockAlign.getQuantity());
+ }
// Function
fields.add(blockFn);
- // Descriptor
- fields.add(buildBlockDescriptor(CGM, blockInfo));
+ if (!IsOpenCL) {
+ // Descriptor
+ fields.add(buildBlockDescriptor(CGM, blockInfo));
+ } else if (auto *Helper =
+ CGM.getTargetCodeGenInfo().getTargetOpenCLBlockHelper()) {
+ for (auto I : Helper->getCustomFieldValues(CGM, blockInfo)) {
+ fields.add(I);
+ }
+ }
unsigned AddrSpace = 0;
if (CGM.getContext().getLangOpts().OpenCL)
fnLLVMType, llvm::GlobalValue::InternalLinkage, name, &CGM.getModule());
CGM.SetInternalFunctionAttributes(blockDecl, fn, fnInfo);
- if (BuildGlobalBlock)
+ if (BuildGlobalBlock) {
+ auto GenVoidPtrTy = getContext().getLangOpts().OpenCL
+ ? CGM.getOpenCLRuntime().getGenericVoidPointerType()
+ : VoidPtrTy;
buildGlobalBlock(CGM, blockInfo,
- llvm::ConstantExpr::getBitCast(fn, VoidPtrTy));
+ llvm::ConstantExpr::getPointerCast(fn, GenVoidPtrTy));
+ }
// Begin generating the function.
StartFunction(blockDecl, fnType->getReturnType(), fn, fnInfo, args,
.getQuantity();
return llvm::ConstantInt::get(Int32Ty, TypeSize, false);
}
+
+llvm::PointerType *CGOpenCLRuntime::getGenericVoidPointerType() {
+ assert(CGM.getLangOpts().OpenCL);
+ return llvm::IntegerType::getInt8PtrTy(
+ CGM.getLLVMContext(),
+ CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
+}
// \brief Returnes a value which indicates the alignment in bytes of the pipe
// element.
virtual llvm::Value *getPipeElemAlign(const Expr *PipeArg);
+
+ /// \return __generic void* type.
+ llvm::PointerType *getGenericVoidPointerType();
};
}
class ABIInfo;
class CallArgList;
class CodeGenFunction;
+class CGBlockInfo;
class CGFunctionInfo;
/// TargetCodeGenInfo - This class organizes various target-specific
/// Get the syncscope used in LLVM IR.
virtual llvm::SyncScope::ID getLLVMSyncScopeID(SyncScope S,
llvm::LLVMContext &C) const;
+
+ /// Inteface class for filling custom fields of a block literal for OpenCL.
+ class TargetOpenCLBlockHelper {
+ public:
+ typedef std::pair<llvm::Value *, StringRef> ValueTy;
+ TargetOpenCLBlockHelper() {}
+ virtual ~TargetOpenCLBlockHelper() {}
+ /// Get the custom field types for OpenCL blocks.
+ virtual llvm::SmallVector<llvm::Type *, 1> getCustomFieldTypes() = 0;
+ /// Get the custom field values for OpenCL blocks.
+ virtual llvm::SmallVector<ValueTy, 1>
+ getCustomFieldValues(CodeGenFunction &CGF, const CGBlockInfo &Info) = 0;
+ virtual bool areAllCustomFieldValuesConstant(const CGBlockInfo &Info) = 0;
+ /// Get the custom field values for OpenCL blocks if all values are LLVM
+ /// constants.
+ virtual llvm::SmallVector<llvm::Constant *, 1>
+ getCustomFieldValues(CodeGenModule &CGM, const CGBlockInfo &Info) = 0;
+ };
+ virtual TargetOpenCLBlockHelper *getTargetOpenCLBlockHelper() const {
+ return nullptr;
+ }
};
} // namespace CodeGen
+++ /dev/null
-// RUN: %clang_cc1 -O0 %s -ffake-address-space-map -emit-llvm -o - -fblocks -triple x86_64-unknown-unknown | FileCheck %s
-// This used to crash due to trying to generate a bitcase from a cstring
-// in the constant address space to i8* in AS0.
-
-void dummy(float (^const op)(float)) {
-}
-
-// CHECK: i8 addrspace(2)* getelementptr inbounds ([9 x i8], [9 x i8] addrspace(2)* @.str, i32 0, i32 0)
-
-kernel void test_block()
-{
- float (^const X)(float) = ^(float x) {
- return x + 42.0f;
- };
- dummy(X);
-}
-
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefix=GENERIC -check-prefix=COMMON %s
-// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa-opencl | FileCheck -check-prefix=AMD -check-prefix=COMMON %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple spir-unknown-unknown | FileCheck -check-prefixes=COMMON,SPIR %s
+// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -o - -O0 -triple amdgcn-amd-amdhsa-opencl | FileCheck -check-prefixes=COMMON,AMD %s
-// Checking for null instead of @__NSConcreteGlobalBlock symbol
-// COMMON: @__block_literal_global = internal addrspace(1) constant { i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } { i8** null
+// COMMON: %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* }
+// SPIR: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
+// AMD: @__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @block_A_block_invoke to i8*) to i8 addrspace(4)*) }
+// COMMON-NOT: .str
+
+// COMMON-LABEL: define internal {{.*}}void @block_A_block_invoke(i8 addrspace(4)* %.block_descriptor, i8 addrspace(3)* %a)
void (^block_A)(local void *) = ^(local void *a) {
return;
};
+// COMMON-LABEL: define {{.*}}void @foo()
void foo(){
int i;
-// Checking for null instead of @_NSConcreteStackBlock symbol
-// COMMON: store i8* null, i8** %block.isa
+ // COMMON-NOT: %block.isa
+ // COMMON-NOT: %block.flags
+ // COMMON-NOT: %block.reserved
+ // COMMON-NOT: %block.descriptor
+ // COMMON: %[[block_size:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 0
+ // SPIR: store i32 16, i32* %[[block_size]]
+ // AMD: store i32 20, i32* %[[block_size]]
+ // COMMON: %[[block_align:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %block, i32 0, i32 1
+ // SPIR: store i32 4, i32* %[[block_align]]
+ // AMD: store i32 8, i32* %[[block_align]]
+ // COMMON: %[[block_invoke:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block:.*]], i32 0, i32 2
+ // COMMON: store i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %[[block_invoke]]
+ // COMMON: %[[block_captured:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]], i32 0, i32 3
+ // COMMON: %[[i_value:.*]] = load i32, i32* %i
+ // COMMON: store i32 %[[i_value]], i32* %[[block_captured]],
+ // COMMON: %[[blk_ptr:.*]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 }>* %[[block]] to i32 ()*
+ // COMMON: %[[blk_gen_ptr:.*]] = addrspacecast i32 ()* %[[blk_ptr]] to i32 () addrspace(4)*
+ // COMMON: store i32 () addrspace(4)* %[[blk_gen_ptr]], i32 () addrspace(4)** %[[block_B:.*]],
+ // COMMON: %[[blk_gen_ptr:.*]] = load i32 () addrspace(4)*, i32 () addrspace(4)** %[[block_B]]
+ // COMMON: %[[block_literal:.*]] = bitcast i32 () addrspace(4)* %[[blk_gen_ptr]] to %struct.__opencl_block_literal_generic addrspace(4)*
+ // COMMON: %[[invoke_addr:.*]] = getelementptr inbounds %struct.__opencl_block_literal_generic, %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]], i32 0, i32 2
+ // COMMON: %[[blk_gen_ptr:.*]] = bitcast %struct.__opencl_block_literal_generic addrspace(4)* %[[block_literal]] to i8 addrspace(4)*
+ // COMMON: %[[invoke_func_ptr:.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %[[invoke_addr]]
+ // COMMON: %[[invoke_func:.*]] = addrspacecast i8 addrspace(4)* %[[invoke_func_ptr]] to i32 (i8 addrspace(4)*)*
+ // COMMON: call {{.*}}i32 %[[invoke_func]](i8 addrspace(4)* %[[blk_gen_ptr]])
+
int (^ block_B)(void) = ^{
return i;
};
+ block_B();
}
+
+// COMMON-LABEL: define internal {{.*}}i32 @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor)
+// COMMON: %[[block:.*]] = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)*
+// COMMON: %[[block_capture_addr:.*]] = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 }>, <{ i32, i32, i8 addrspace(4)*, i32 }> addrspace(4)* %[[block]], i32 0, i32 3
+// COMMON: %[[block_capture:.*]] = load i32, i32 addrspace(4)* %[[block_capture_addr]]
typedef struct {int a;} ndrange_t;
// N.B. The check here only exists to set BL_GLOBAL
-// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
+// COMMON: @block_G = addrspace(1) constant void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*)
const bl_t block_G = (bl_t) ^ (local void *a) {};
kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: [[NDR:%[a-z0-9]+]] = alloca %struct.ndrange_t, align 4
// COMMON: [[DEF_Q:%[0-9]+]] = load %opencl.queue_t{{.*}}*, %opencl.queue_t{{.*}}** %default_queue
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block to void ()*
+ // B32: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block to void ()*
+ // B64: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32 addrspace(1)*, i32 }>* %block to void ()*
// COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
- // COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* [[BL_I8]])
+ // COMMON: call i32 @__enqueue_kernel_basic(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* byval [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* [[BL_I8]])
enqueue_kernel(default_queue, flags, ndrange,
^(void) {
a[i] = b[i];
// COMMON: [[FLAGS:%[0-9]+]] = load i32, i32* %flags
// COMMON: [[WAIT_EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %event_wait_list to %opencl.clk_event_t{{.*}}* addrspace(4)*
// COMMON: [[EVNT:%[0-9]+]] = addrspacecast %opencl.clk_event_t{{.*}}** %clk_event to %opencl.clk_event_t{{.*}}* addrspace(4)*
- // COMMON: [[BL:%[0-9]+]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor addrspace(2)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
+ // COMMON: [[BL:%[0-9]+]] = bitcast <{ i32, i32, i8 addrspace(4)*, i32{{.*}}, i32{{.*}}, i32{{.*}} }>* %block3 to void ()*
// COMMON: [[BL_I8:%[0-9]+]] = addrspacecast void ()* [[BL]] to i8 addrspace(4)*
// COMMON: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* [[BL_I8]])
enqueue_kernel(default_queue, flags, ndrange, 2, &event_wait_list, &clk_event,
// B32: %[[TMP:.*]] = alloca [1 x i32]
// B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
// B32: store i32 256, i32* %[[TMP1]], align 4
- // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 256, i64* %[[TMP1]], align 8
- // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
// B32: %[[TMP:.*]] = alloca [1 x i32]
// B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
// B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
- // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
- // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
// B32: %[[TMP:.*]] = alloca [1 x i32]
// B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
// B32: store i32 256, i32* %[[TMP1]], align 4
- // B32: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 256, i64* %[[TMP1]], align 8
- // B64: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}} [[WAIT_EVNT]], %opencl.clk_event_t{{.*}} [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
^(local void *p) {
return;
// B32: %[[TMP:.*]] = alloca [1 x i32]
// B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
// B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
- // B32: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
- // B64: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_events_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t{{.*}}* addrspace(4)* [[WAIT_EVNT]], %opencl.clk_event_t{{.*}}* addrspace(4)* [[EVNT]], i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange, 2, event_wait_list2, &clk_event,
^(local void *p) {
return;
// B32: %[[TMP:.*]] = alloca [1 x i32]
// B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
// B32: store i32 %{{.*}}, i32* %[[TMP1]], align 4
- // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 %{{.*}}, i64* %[[TMP1]], align 8
- // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
// B32: store i32 2, i32* %[[TMP2]], align 4
// B32: %[[TMP3:.*]] = getelementptr [3 x i32], [3 x i32]* %[[TMP]], i32 0, i32 2
// B32: store i32 4, i32* %[[TMP3]], align 4
- // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [3 x i64]
// B64: %[[TMP1:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 1, i64* %[[TMP1]], align 8
// B64: store i64 2, i64* %[[TMP2]], align 8
// B64: %[[TMP3:.*]] = getelementptr [3 x i64], [3 x i64]* %[[TMP]], i32 0, i32 2
// B64: store i64 4, i64* %[[TMP3]], align 8
- // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p1, local void *p2, local void *p3) {
return;
// B32: %[[TMP:.*]] = alloca [1 x i32]
// B32: %[[TMP1:.*]] = getelementptr [1 x i32], [1 x i32]* %[[TMP]], i32 0, i32 0
// B32: store i32 0, i32* %[[TMP1]], align 4
- // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
+ // B32: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %[[TMP1]])
// B64: %[[TMP:.*]] = alloca [1 x i64]
// B64: %[[TMP1:.*]] = getelementptr [1 x i64], [1 x i64]* %[[TMP]], i32 0, i32 0
// B64: store i64 4294967296, i64* %[[TMP1]], align 8
- // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{(.[0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
+ // B64: call i32 @__enqueue_kernel_vaargs(%opencl.queue_t{{.*}}* [[DEF_Q]], i32 [[FLAGS]], %struct.ndrange_t* [[NDR]]{{([0-9]+)?}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global{{(.[0-9]+)?}} to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i64* %[[TMP1]])
enqueue_kernel(default_queue, flags, ndrange,
^(local void *p) {
return;
// The full type of these expressions are long (and repeated elsewhere), so we
// capture it as part of the regex for convenience and clarity.
- // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A:@__block_literal_global(\.[0-9]+)?]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
+ // COMMON: store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_A:@__block_literal_global(\.[0-9]+)?]] to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %block_A
void (^const block_A)(void) = ^{
return;
};
- // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_B:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
+ // COMMON: store void (i8 addrspace(3)*) addrspace(4)* addrspacecast (void (i8 addrspace(3)*) addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_B:@__block_literal_global(\.[0-9]+)?]] to void (i8 addrspace(3)*) addrspace(1)*) to void (i8 addrspace(3)*) addrspace(4)*), void (i8 addrspace(3)*) addrspace(4)** %block_B
void (^const block_B)(local void *) = ^(local void *a) {
return;
};
- // COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*))
unsigned size = get_kernel_work_group_size(block_A);
- // COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_B]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call i32 @__get_kernel_work_group_size_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_B]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_work_group_size(block_B);
- // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_A]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_preferred_work_group_size_multiple(block_A);
- // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_preferred_work_group_size_multiple(block_G);
- // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
- // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
+ // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
}