]> granicus.if.org Git - clang/commitdiff
[OpenCL] Add missing subgroup builtins
authorJoey Gouly <joey.gouly@gmail.com>
Tue, 1 Aug 2017 13:27:09 +0000 (13:27 +0000)
committerJoey Gouly <joey.gouly@gmail.com>
Tue, 1 Aug 2017 13:27:09 +0000 (13:27 +0000)
This adds get_kernel_max_sub_group_size_for_ndrange and
get_kernel_sub_group_count_for_ndrange.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@309678 91177308-0d34-0410-b5e6-96231b3b80d8

include/clang/Basic/Builtins.def
lib/CodeGen/CGBuiltin.cpp
lib/Sema/SemaChecking.cpp
test/CodeGenOpenCL/cl20-device-side-enqueue.cl
test/SemaOpenCL/cl20-device-side-enqueue.cl

index 1ddb9beaf9133570a93a8f0a9d032cd6999995d1..82242d9571600480ae3d01a313b883d562f702a1 100644 (file)
@@ -1398,8 +1398,10 @@ LANGBUILTIN(get_pipe_max_packets, "Ui.", "tn", OCLC20_LANG)
 // OpenCL v2.0 s6.13.17 - Enqueue kernel functions.
 // Custom builtin check allows to perform special check of passed block arguments.
 LANGBUILTIN(enqueue_kernel, "i.", "tn", OCLC20_LANG)
-LANGBUILTIN(get_kernel_work_group_size, "i.", "tn", OCLC20_LANG)
-LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "i.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_work_group_size, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_preferred_work_group_size_multiple, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_max_sub_group_size_for_ndrange, "Ui.", "tn", OCLC20_LANG)
+LANGBUILTIN(get_kernel_sub_group_count_for_ndrange, "Ui.", "tn", OCLC20_LANG)
 
 // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
 LANGBUILTIN(to_global, "v*v*", "tn", OCLC20_LANG)
index 0d10ab58586415138d1638b464fc99196e55cc41..92c0da9845915955c02c22bc7e6823f5756837b8 100644 (file)
@@ -2704,6 +2704,25 @@ RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
             "__get_kernel_preferred_work_group_multiple_impl"),
         Arg));
   }
+  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
+  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
+    llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
+        getContext().getTargetAddressSpace(LangAS::opencl_generic));
+    LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
+    llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
+    Value *Block = EmitScalarExpr(E->getArg(1));
+    Block = Builder.CreatePointerCast(Block, GenericVoidPtrTy);
+    const char *Name =
+        BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
+            ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
+            : "__get_kernel_sub_group_count_for_ndrange_impl";
+    return RValue::get(Builder.CreateCall(
+        CGM.CreateRuntimeFunction(
+            llvm::FunctionType::get(
+                IntTy, {NDRange->getType(), GenericVoidPtrTy}, false),
+            Name),
+        {NDRange, Block}));
+  }
   case Builtin::BIprintf:
     if (getTarget().getTriple().isNVPTX())
       return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
index b2f7807ce2dbf3f9e5c650db41b0494e0414110d..81dd36cf67163bbe2530981449ffd632c6a39381 100644 (file)
@@ -308,6 +308,32 @@ static bool checkOpenCLSubgroupExt(Sema &S, CallExpr *Call) {
   return false;
 }
 
+static bool SemaOpenCLBuiltinNDRangeAndBlock(Sema &S, CallExpr *TheCall) {
+  if (checkArgCount(S, TheCall, 2))
+    return true;
+
+  if (checkOpenCLSubgroupExt(S, TheCall))
+    return true;
+
+  // First argument is an ndrange_t type.
+  Expr *NDRangeArg = TheCall->getArg(0);
+  if (NDRangeArg->getType().getAsString() != "ndrange_t") {
+    S.Diag(NDRangeArg->getLocStart(),
+           diag::err_opencl_builtin_expected_type)
+        << TheCall->getDirectCallee() << "'ndrange_t'";
+    return true;
+  }
+
+  Expr *BlockArg = TheCall->getArg(1);
+  if (!isBlockPointer(BlockArg)) {
+    S.Diag(BlockArg->getLocStart(),
+           diag::err_opencl_builtin_expected_type)
+        << TheCall->getDirectCallee() << "block";
+    return true;
+  }
+  return checkOpenCLBlockArgs(S, BlockArg);
+}
+
 /// OpenCL C v2.0, s6.13.17.6 - Check the argument to the
 /// get_kernel_work_group_size
 /// and get_kernel_preferred_work_group_size_multiple builtin functions.
@@ -1109,6 +1135,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (SemaOpenCLBuiltinKernelWorkGroupSize(*this, TheCall))
       return ExprError();
     break;
+    break;
+  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
+  case Builtin::BIget_kernel_sub_group_count_for_ndrange:
+    if (SemaOpenCLBuiltinNDRangeAndBlock(*this, TheCall))
+      return ExprError();
+    break;
   case Builtin::BI__builtin_os_log_format:
   case Builtin::BI__builtin_os_log_format_buffer_size:
     if (SemaBuiltinOSLogFormat(TheCall)) {
index a2a065d37bdd00ecdcd7f912821ce3a896dd530c..567ab76490d9064868fa2fe247949ebb52e235f1 100644 (file)
@@ -140,4 +140,9 @@ kernel void device_side_enqueue(global int *a, global int *b, int i) {
   size = get_kernel_preferred_work_group_size_multiple(block_A);
   // COMMON: call i32 @__get_kernel_preferred_work_group_multiple_impl(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* [[BL_GLOBAL]] to i8 addrspace(1)*) to i8 addrspace(4)*))
   size = get_kernel_preferred_work_group_size_multiple(block_G);
+
+  // COMMON: call i32 @__get_kernel_max_sub_group_size_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_max_sub_group_size_for_ndrange(ndrange, ^(){});
+  // COMMON: call i32 @__get_kernel_sub_group_count_for_ndrange_impl(%struct.ndrange_t* {{.*}}, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i8**, i32, i32, i8*, %struct.__block_descriptor addrspace(2)* } addrspace(1)* {{.*}} to i8 addrspace(1)*) to i8 addrspace(4)*))
+  size = get_kernel_sub_group_count_for_ndrange(ndrange, ^(){});
 }
index 3f6527afeadc92878974c66dfd1fa7e42b369fe0..b4714dc290b2d5ab2810d9f75e4d692b291e1ebb 100644 (file)
@@ -209,3 +209,35 @@ kernel void work_group_size_tests() {
   size = get_kernel_preferred_work_group_size_multiple(1);         // expected-error{{expected block argument}}
   size = get_kernel_preferred_work_group_size_multiple(block_A, 1); // expected-error{{too many arguments to function call, expected 1, have 2}}
 }
+
+#pragma OPENCL EXTENSION cl_khr_subgroups : enable
+
+kernel void foo(global int *buf)
+{
+  ndrange_t n;
+  buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){});
+  buf[0] = get_kernel_max_sub_group_size_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected 'ndrange_t' argument type}}
+  buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_max_sub_group_size_for_ndrange', expected block argument type}}
+}
+
+kernel void bar(global int *buf)
+{
+  ndrange_t n;
+  buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){});
+  buf[0] = get_kernel_sub_group_count_for_ndrange(0, ^(){}); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected 'ndrange_t' argument type}}
+  buf[0] = get_kernel_sub_group_count_for_ndrange(n, 1); // expected-error{{illegal call to 'get_kernel_sub_group_count_for_ndrange', expected block argument type}}
+}
+
+#pragma OPENCL EXTENSION cl_khr_subgroups : disable
+
+kernel void foo1(global int *buf)
+{
+  ndrange_t n;
+  buf[0] = get_kernel_max_sub_group_size_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_max_sub_group_size_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
+}
+
+kernel void bar1(global int *buf)
+{
+  ndrange_t n;
+  buf[0] = get_kernel_sub_group_count_for_ndrange(n, ^(){}); // expected-error {{use of declaration 'get_kernel_sub_group_count_for_ndrange' requires cl_khr_subgroups extension to be enabled}}
+}