def err_omp_wrong_cancel_region : Error<
"one of 'for', 'parallel', 'sections' or 'taskgroup' is expected">;
def err_omp_parent_cancel_region_nowait : Error<
- "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be nowait">;
+ "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be nowait">;
def err_omp_parent_cancel_region_ordered : Error<
- "parent region for 'omp %select{cancellation point/cancel}0' construct cannot be ordered">;
+ "parent region for 'omp %select{cancellation point|cancel}0' construct cannot be ordered">;
def err_omp_reduction_wrong_type : Error<"reduction type cannot be %select{qualified with 'const', 'volatile' or 'restrict'|a function|a reference|an array}0 type">;
def err_omp_wrong_var_in_declare_reduction : Error<"only %select{'omp_priv' or 'omp_orig'|'omp_in' or 'omp_out'}0 variables are allowed in %select{initializer|combiner}0 expression">;
def err_omp_declare_reduction_redefinition : Error<"redefinition of user-defined reduction for type %0">;
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
OMPRTL__tgt_target,
+ // Call to int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
+ // *arg_types);
+ OMPRTL__tgt_target_nowait,
// Call to int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types, int32_t num_teams, int32_t thread_limit);
OMPRTL__tgt_target_teams,
+ // Call to int32_t __tgt_target_teams_nowait(int64_t device_id, void
+ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
+ // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ OMPRTL__tgt_target_teams_nowait,
// Call to void __tgt_register_lib(__tgt_bin_desc *desc);
OMPRTL__tgt_register_lib,
// Call to void __tgt_unregister_lib(__tgt_bin_desc *desc);
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target");
break;
}
+ case OMPRTL__tgt_target_nowait: {
+ // Build int32_t __tgt_target_nowait(int64_t device_id, void *host_ptr,
+ // int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
+ // int64_t *arg_types);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo()};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_nowait");
+ break;
+ }
case OMPRTL__tgt_target_teams: {
// Build int32_t __tgt_target_teams(int64_t device_id, void *host_ptr,
// int32_t arg_num, void** args_base, void **args, size_t *arg_sizes,
RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams");
break;
}
+ case OMPRTL__tgt_target_teams_nowait: {
+ // Build int32_t __tgt_target_teams_nowait(int64_t device_id, void
+ // *host_ptr, int32_t arg_num, void** args_base, void **args, size_t
+ // *arg_sizes, int64_t *arg_types, int32_t num_teams, int32_t thread_limit);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty,
+ CGM.VoidPtrTy,
+ CGM.Int32Ty,
+ CGM.VoidPtrPtrTy,
+ CGM.VoidPtrPtrTy,
+ CGM.SizeTy->getPointerTo(),
+ CGM.Int64Ty->getPointerTo(),
+ CGM.Int32Ty,
+ CGM.Int32Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__tgt_target_teams_nowait");
+ break;
+ }
case OMPRTL__tgt_register_lib: {
// Build void __tgt_register_lib(__tgt_bin_desc *desc);
QualType ParamTy =
auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D);
auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D);
+ bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
// The target region is an outlined function launched by the runtime
// via calls __tgt_target() or __tgt_target_teams().
//
Info.MapTypesArray, NumTeams,
NumThreads};
Return = CGF.EmitRuntimeCall(
- RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs);
+ RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_teams_nowait
+ : OMPRTL__tgt_target_teams),
+ OffloadingArgs);
} else {
llvm::Value *OffloadingArgs[] = {
DeviceID, OutlinedFnID,
PointerNum, Info.BasePointersArray,
Info.PointersArray, Info.SizesArray,
Info.MapTypesArray};
- Return = CGF.EmitRuntimeCall(RT.createRuntimeFunction(OMPRTL__tgt_target),
- OffloadingArgs);
+ Return = CGF.EmitRuntimeCall(
+ RT.createRuntimeFunction(HasNowait ? OMPRTL__tgt_target_nowait
+ : OMPRTL__tgt_target),
+ OffloadingArgs);
}
// Check the error code and execute the host version if required.
// CHECK-DAG: [[ADD:%.+]] = add nsw i32
// CHECK-DAG: [[DEVICE:%.+]] = sext i32 [[ADD]] to i64
- // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
// CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
// CHECK: call void [[HVT0_:@.+]](i[[SZ]]* [[BP0]], i[[SZ]] [[BP1]])
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target device(global + a)
+ #pragma omp target device(global + a) nowait
{
static int local1;
*plocal = global;
double cn[5][n];
TT<long long, char> d;
- // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+ // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
// CHECK: [[FAIL]]
// CHECK: call void [[HVT0:@.+]]()
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target parallel
+ #pragma omp target parallel nowait
{
}
a += 1;
}
- // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
int lin = 12;
- #pragma omp target parallel for if(target: 1) linear(lin, a : get_val())
+ #pragma omp target parallel for if(target: 1) linear(lin, a : get_val()) nowait
for (unsigned long long it = 2000; it >= 600; it-=400) {
aa += 1;
}
double cn[5][n];
TT<long long, char> d;
- // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
+ // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 0)
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
// CHECK: [[FAIL]]
// CHECK: call void [[HVT0:@.+]]()
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target parallel for simd
+ #pragma omp target parallel for simd nowait
for (int i = 3; i < 32; i += 5) {
}
double cn[5][n];
TT<long long, char> d;
- // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
+ // CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null)
// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
// CHECK: [[FAIL]]
// CHECK: call void [[HVT0:@.+]]()
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target simd
+ #pragma omp target simd nowait
for (int i = 3; i < 32; i += 5) {
}
double cn[5][n];
TT<long long, char> d;
- // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
// CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa)
+ #pragma omp target teams num_teams(a) thread_limit(a) firstprivate(aa) nowait
{
}
double cn[5][n];
TT<long long, char> d;
- // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
// CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa)
+ #pragma omp target teams distribute num_teams(a) thread_limit(a) firstprivate(aa) nowait
for (int i = 0; i < 10; ++i) {
}
double cn[5][n];
TT<long long, char> d;
- // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+ // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
// CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
// CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
// CHECK: call void [[HVT0:@.+]](i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^,]+}}, i[[SZ]] {{[^)]+}})
// CHECK-NEXT: br label %[[END]]
// CHECK: [[END]]
- #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16)
+ #pragma omp target teams distribute simd num_teams(a) thread_limit(a) firstprivate(aa) simdlen(16) nowait
for (int i = 0; i < 10; ++i) {
}