//
// Offloading related calls
//
+ // Call to void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ OMPRTL__kmpc_push_target_tripcount,
// Call to int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
// *arg_types);
FnTy, /*Name=*/"__kmpc_task_reduction_get_th_data");
break;
}
+ case OMPRTL__kmpc_push_target_tripcount: {
+ // Build void __kmpc_push_target_tripcount(int64_t device_id, kmp_uint64
+ // size);
+ llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int64Ty};
+ llvm::FunctionType *FnTy =
+ llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+ RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_push_target_tripcount");
+ break;
+ }
case OMPRTL__tgt_target: {
// Build int32_t __tgt_target(int64_t device_id, void *host_ptr, int32_t
// arg_num, void** args_base, void **args, size_t *arg_sizes, int64_t
}
}
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
+static bool isTrivial(ASTContext &Ctx, const Expr * E) {
+ // We can skip constant expressions.
+ // We can skip expressions with trivial calls or simple expressions.
+ return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) ||
+ !E->hasNonTrivialCall(Ctx)) &&
+ !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true);
+}
+
+/// Checks if the \p Body is the \a CompoundStmt and returns its child statement
+/// iff there is only one that is not evaluatable at the compile time.
+static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) {
+ if (const auto *C = dyn_cast<CompoundStmt>(Body)) {
+ const Stmt *Child = nullptr;
+ for (const Stmt *S : C->body()) {
+ if (const auto *E = dyn_cast<Expr>(S)) {
+ if (isTrivial(Ctx, E))
+ continue;
+ }
+ // Some of the statements can be ignored.
+ if (isa<AsmStmt>(S) || isa<NullStmt>(S) || isa<OMPFlushDirective>(S) ||
+ isa<OMPBarrierDirective>(S) || isa<OMPTaskyieldDirective>(S))
+ continue;
+ // Analyze declarations.
+ if (const auto *DS = dyn_cast<DeclStmt>(S)) {
+ if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) {
+ if (isa<EmptyDecl>(D) || isa<DeclContext>(D) ||
+ isa<TypeDecl>(D) || isa<PragmaCommentDecl>(D) ||
+ isa<PragmaDetectMismatchDecl>(D) || isa<UsingDecl>(D) ||
+ isa<UsingDirectiveDecl>(D) ||
+ isa<OMPDeclareReductionDecl>(D) ||
+ isa<OMPThreadPrivateDecl>(D))
+ return true;
+ const auto *VD = dyn_cast<VarDecl>(D);
+ if (!VD)
+ return false;
+ return VD->isConstexpr() ||
+ ((VD->getType().isTrivialType(Ctx) ||
+ VD->getType()->isReferenceType()) &&
+ (!VD->hasInit() || isTrivial(Ctx, VD->getInit())));
+ }))
+ continue;
+ }
+ // Found multiple children - cannot get the one child only.
+ if (Child)
+ return Body;
+ Child = S;
+ }
+ if (Child)
+ return Child;
+ }
+ return Body;
+}
+
+/// Check for inner distribute directive.
+static const OMPExecutableDirective *
+getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) {
+ const auto *CS = D.getInnermostCapturedStmt();
+ const auto *Body =
+ CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+ const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body);
+
+ if (const auto *NestedDir = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+ switch (D.getDirectiveKind()) {
+ case OMPD_target:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ if (DKind == OMPD_teams) {
+ Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+ /*IgnoreCaptured=*/true);
+ if (!Body)
+ return nullptr;
+ ChildStmt = getSingleCompoundChild(Ctx, Body);
+ if (const auto *NND = dyn_cast<OMPExecutableDirective>(ChildStmt)) {
+ DKind = NND->getDirectiveKind();
+ if (isOpenMPDistributeDirective(DKind))
+ return NND;
+ }
+ }
+ return nullptr;
+ case OMPD_target_teams:
+ if (isOpenMPDistributeDirective(DKind))
+ return NestedDir;
+ return nullptr;
+ case OMPD_target_parallel:
+ case OMPD_target_simd:
+ case OMPD_target_parallel_for:
+ case OMPD_target_parallel_for_simd:
+ return nullptr;
+ case OMPD_target_teams_distribute:
+ case OMPD_target_teams_distribute_simd:
+ case OMPD_target_teams_distribute_parallel_for:
+ case OMPD_target_teams_distribute_parallel_for_simd:
+ case OMPD_parallel:
+ case OMPD_for:
+ case OMPD_parallel_for:
+ case OMPD_parallel_sections:
+ case OMPD_for_simd:
+ case OMPD_parallel_for_simd:
+ case OMPD_cancel:
+ case OMPD_cancellation_point:
+ case OMPD_ordered:
+ case OMPD_threadprivate:
+ case OMPD_task:
+ case OMPD_simd:
+ case OMPD_sections:
+ case OMPD_section:
+ case OMPD_single:
+ case OMPD_master:
+ case OMPD_critical:
+ case OMPD_taskyield:
+ case OMPD_barrier:
+ case OMPD_taskwait:
+ case OMPD_taskgroup:
+ case OMPD_atomic:
+ case OMPD_flush:
+ case OMPD_teams:
+ case OMPD_target_data:
+ case OMPD_target_exit_data:
+ case OMPD_target_enter_data:
+ case OMPD_distribute:
+ case OMPD_distribute_simd:
+ case OMPD_distribute_parallel_for:
+ case OMPD_distribute_parallel_for_simd:
+ case OMPD_teams_distribute:
+ case OMPD_teams_distribute_simd:
+ case OMPD_teams_distribute_parallel_for:
+ case OMPD_teams_distribute_parallel_for_simd:
+ case OMPD_target_update:
+ case OMPD_declare_simd:
+ case OMPD_declare_target:
+ case OMPD_end_declare_target:
+ case OMPD_declare_reduction:
+ case OMPD_taskloop:
+ case OMPD_taskloop_simd:
+ case OMPD_requires:
+ case OMPD_unknown:
+ llvm_unreachable("Unexpected directive.");
+ }
+ }
+
+ return nullptr;
+}
+
+void CGOpenMPRuntime::emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter) {
+ OpenMPDirectiveKind Kind = D.getDirectiveKind();
+ const OMPExecutableDirective *TD = &D;
+ // Get nested teams distribute kind directive, if any.
+ if (!isOpenMPDistributeDirective(Kind) || !isOpenMPTeamsDirective(Kind))
+ TD = getNestedDistributeDirective(CGM.getContext(), D);
+ if (!TD)
+ return;
+ const auto *LD = cast<OMPLoopDirective>(TD);
+ auto &&CodeGen = [LD, &Device, &SizeEmitter, this](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ llvm::Value *NumIterations = SizeEmitter(CGF, *LD);
+
+ // Emit device ID if any.
+ llvm::Value *DeviceID;
+ if (Device)
+ DeviceID = CGF.Builder.CreateIntCast(CGF.EmitScalarExpr(Device),
+ CGF.Int64Ty, /*isSigned=*/true);
+ else
+ DeviceID = CGF.Builder.getInt64(OMP_DEVICEID_UNDEF);
+
+ llvm::Value *Args[] = {DeviceID, NumIterations};
+ CGF.EmitRuntimeCall(
+ createRuntimeFunction(OMPRTL__kmpc_push_target_tripcount), Args);
+ };
+ emitInlinedDirective(CGF, OMPD_unknown, CodeGen);
+}
+
void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
llvm::Value *OutlinedFn,
bool IsOffloadEntry,
const RegionCodeGenTy &CodeGen);
+ /// Emit code that pushes the trip count of loops associated with constructs
+ /// 'target teams distribute' and 'teams distribute parallel for'.
+ /// \param SizeEmitter Emits the int64 value for the number of iterations of
+ /// the associated loop.
+ virtual void emitTargetNumIterationsCall(
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, const Expr *Device,
+ const llvm::function_ref<llvm::Value *(
+ CodeGenFunction &CGF, const OMPLoopDirective &D)> &SizeEmitter);
+
/// Emit the target offloading code associated with \a D. The emitted
/// code attempts offloading the execution to the device, an the event of
/// a failure it executes the host version outlined in \a OutlinedFn.
: CGOpenMPRuntimeNVPTX::Generic;
}
-// Checks if the expression is constant or does not have non-trivial function
-// calls.
+/// Checks if the expression is constant or does not have non-trivial function
+/// calls.
static bool isTrivial(ASTContext &Ctx, const Expr * E) {
// We can skip constant expressions.
// We can skip expressions with trivial calls or simple expressions.
CGM.getOpenMPRuntime().emitTargetOutlinedFunction(S, ParentName, Fn, FnID,
IsOffloadEntry, CodeGen);
OMPLexicalScope Scope(CGF, S, OMPD_task);
+ auto &&SizeEmitter = [](CodeGenFunction &CGF, const OMPLoopDirective &D) {
+ OMPLoopScope(CGF, D);
+ // Emit calculation of the iterations count.
+ llvm::Value *NumIterations = CGF.EmitScalarExpr(D.getNumIterations());
+ NumIterations = CGF.Builder.CreateIntCast(NumIterations, CGF.Int64Ty,
+ /*IsSigned=*/false);
+ return NumIterations;
+ };
+ CGM.getOpenMPRuntime().emitTargetNumIterationsCall(CGF, S, Device,
+ SizeEmitter);
CGM.getOpenMPRuntime().emitTargetCall(CGF, S, Fn, FnID, IfCond, Device);
}
// discard capture expressions for te and th
// HCK1: = alloca i32,
// HCK1: = alloca i32,
+ // HCK1: = alloca i32,
+ // HCK1: = alloca i32,
+ // HCK1: = alloca i32,
// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
#pragma omp target teams distribute parallel for
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
int main() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
// discard capture expressions for te and th
// HCK1: = alloca i32,
// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
+// HCK1: = alloca i32,
// HCK1: [[I_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[N_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// HCK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+// HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// HCK1: [[I_PAR:%.+]] = load{{.+}}, {{.+}} [[I_CAST]],
// HCK1: [[N_PAR:%.+]] = load{{.+}}, {{.+}} [[N_CAST]],
// HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
{{{
#pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
- for(int i = 0; i < n; i++) {
+ for(
+ int i = 0; i < n; i++) {
a[i] = g[0];
}
}}}
// CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
void gtid_test() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
#pragma omp target teams distribute parallel for simd
// CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
int main() {
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_0:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call void [[OFFLOADING_FUN_1:@.+]](
+// CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
// CHECK: call void [[OFFLOADING_FUN_2:@.+]](
// CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
int a[100];
// CK1: define {{.*}}i32 @{{.+}}teams_argument_globali(
-int teams_argument_global(int n){
+int teams_argument_global(int n){
int te = n / 128;
int th = 128;
// discard n_addr
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
// CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
// CK1: alloca i32,
// CK1: [[TE:%.+]] = alloca i32,
// CK1: [[TH:%.+]] = alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
+ // CK1: alloca i32,
// CK1: [[TE_CAST:%.+]] = alloca i{{32|64}},
// CK1: [[TH_CAST:%.+]] = alloca i{{32|64}},
+ // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
// CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
// CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],