From 440f37958ebf3da5bd5338ec121fe21bd0990ac4 Mon Sep 17 00:00:00 2001 From: Arpith Chacko Jacob Date: Wed, 25 Jan 2017 00:57:16 +0000 Subject: [PATCH] [OpenMP] Support for the num_threads-clause on 'target parallel'. The num_threads-clause on the combined directive applies to the 'parallel' region of this construct. We modify the NumThreadsClause class to capture the clause expression within the 'target' region. The offload runtime call for 'target parallel' is changed to __tgt_target_teams() with 1 team and the number of threads set by this clause or a default if none. Reviewers: ABataev Differential Revision: https://reviews.llvm.org/D29082 git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@292997 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/OpenMPClause.h | 21 +- include/clang/AST/RecursiveASTVisitor.h | 1 + lib/AST/OpenMPClause.cpp | 3 +- lib/AST/StmtProfile.cpp | 1 + lib/CodeGen/CGOpenMPRuntime.cpp | 123 +++++-- lib/Sema/SemaOpenMP.cpp | 85 ++++- lib/Serialization/ASTReaderStmt.cpp | 1 + lib/Serialization/ASTWriterStmt.cpp | 1 + test/OpenMP/target_parallel_codegen.cpp | 14 +- test/OpenMP/target_parallel_if_codegen.cpp | 12 +- .../target_parallel_num_threads_codegen.cpp | 344 ++++++++++++++++++ tools/libclang/CIndex.cpp | 1 + 12 files changed, 555 insertions(+), 52 deletions(-) create mode 100644 test/OpenMP/target_parallel_num_threads_codegen.cpp diff --git a/include/clang/AST/OpenMPClause.h b/include/clang/AST/OpenMPClause.h index f39aa50232..b7ee8e6a32 100644 --- a/include/clang/AST/OpenMPClause.h +++ b/include/clang/AST/OpenMPClause.h @@ -345,7 +345,7 @@ public: /// In this example directive '#pragma omp parallel' has simple 'num_threads' /// clause with number of threads '6'. /// -class OMPNumThreadsClause : public OMPClause { +class OMPNumThreadsClause : public OMPClause, public OMPClauseWithPreInit { friend class OMPClauseReader; /// \brief Location of '('. SourceLocation LParenLoc; @@ -360,20 +360,29 @@ public: /// \brief Build 'num_threads' clause with condition \a NumThreads. /// /// \param NumThreads Number of threads for the construct. + /// \param HelperNumThreads Helper Number of threads for the construct. + /// \param CaptureRegion Innermost OpenMP region where expressions in this + /// clause must be captured. /// \param StartLoc Starting location of the clause. /// \param LParenLoc Location of '('. /// \param EndLoc Ending location of the clause. /// - OMPNumThreadsClause(Expr *NumThreads, SourceLocation StartLoc, - SourceLocation LParenLoc, SourceLocation EndLoc) - : OMPClause(OMPC_num_threads, StartLoc, EndLoc), LParenLoc(LParenLoc), - NumThreads(NumThreads) {} + OMPNumThreadsClause(Expr *NumThreads, Stmt *HelperNumThreads, + OpenMPDirectiveKind CaptureRegion, + SourceLocation StartLoc, SourceLocation LParenLoc, + SourceLocation EndLoc) + : OMPClause(OMPC_num_threads, StartLoc, EndLoc), + OMPClauseWithPreInit(this), LParenLoc(LParenLoc), + NumThreads(NumThreads) { + setPreInitStmt(HelperNumThreads, CaptureRegion); + } /// \brief Build an empty clause. /// OMPNumThreadsClause() : OMPClause(OMPC_num_threads, SourceLocation(), SourceLocation()), - LParenLoc(SourceLocation()), NumThreads(nullptr) {} + OMPClauseWithPreInit(this), LParenLoc(SourceLocation()), + NumThreads(nullptr) {} /// \brief Sets the location of '('. void setLParenLoc(SourceLocation Loc) { LParenLoc = Loc; } diff --git a/include/clang/AST/RecursiveASTVisitor.h b/include/clang/AST/RecursiveASTVisitor.h index cb7db4f047..195fc66264 100644 --- a/include/clang/AST/RecursiveASTVisitor.h +++ b/include/clang/AST/RecursiveASTVisitor.h @@ -2725,6 +2725,7 @@ bool RecursiveASTVisitor::VisitOMPFinalClause(OMPFinalClause *C) { template bool RecursiveASTVisitor::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) { + TRY_TO(VisitOMPClauseWithPreInit(C)); TRY_TO(TraverseStmt(C->getNumThreads())); return true; } diff --git a/lib/AST/OpenMPClause.cpp b/lib/AST/OpenMPClause.cpp index 52da57f633..55f5ca55d2 100644 --- a/lib/AST/OpenMPClause.cpp +++ b/lib/AST/OpenMPClause.cpp @@ -50,10 +50,11 @@ const OMPClauseWithPreInit *OMPClauseWithPreInit::get(const OMPClause *C) { return static_cast(C); case OMPC_if: return static_cast(C); + case OMPC_num_threads: + return static_cast(C); case OMPC_default: case OMPC_proc_bind: case OMPC_final: - case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: diff --git a/lib/AST/StmtProfile.cpp b/lib/AST/StmtProfile.cpp index 2f3653f32d..fa086a3f33 100644 --- a/lib/AST/StmtProfile.cpp +++ b/lib/AST/StmtProfile.cpp @@ -294,6 +294,7 @@ void OMPClauseProfiler::VisitOMPFinalClause(const OMPFinalClause *C) { } void OMPClauseProfiler::VisitOMPNumThreadsClause(const OMPNumThreadsClause *C) { + VistOMPClauseWithPreInit(C); if (C->getNumThreads()) Profiler->VisitStmt(C->getNumThreads()); } diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 2f0648e05e..d9c68f9cce 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -4894,19 +4894,29 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) { return Body; } -/// \brief Emit the num_teams clause of an enclosed teams directive at the -/// target region scope. If there is no teams directive associated with the -/// target directive, or if there is no num_teams clause associated with the -/// enclosed teams directive, return nullptr. +/// Emit the number of teams for a target directive. Inspect the num_teams +/// clause associated with a teams construct combined or closely nested +/// with the target directive. +/// +/// Emit a team of size one for directives such as 'target parallel' that +/// have no associated teams construct. +/// +/// Otherwise, return nullptr. static llvm::Value * -emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); + // If the target directive is combined with a parallel directive but not a + // teams directive, start one team. + if (isOpenMPParallelDirective(D.getDirectiveKind()) && + !isOpenMPTeamsDirective(D.getDirectiveKind())) + return CGF.Builder.getInt32(1); + // FIXME: For the moment we do not support combined directives with target and // teams, so we do not expect to get any num_teams clause in the provided // directive. Once we support that, this assertion can be replaced by the @@ -4943,19 +4953,56 @@ emitNumTeamsClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, return nullptr; } -/// \brief Emit the thread_limit clause of an enclosed teams directive at the -/// target region scope. If there is no teams directive associated with the -/// target directive, or if there is no thread_limit clause associated with the -/// enclosed teams directive, return nullptr. +/// Emit the number of threads for a target directive. Inspect the +/// thread_limit clause associated with a teams construct combined or closely +/// nested with the target directive. +/// +/// Emit the num_threads clause for directives such as 'target parallel' that +/// have no associated teams construct. +/// +/// Otherwise, return nullptr. static llvm::Value * -emitThreadLimitClauseForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, - const OMPExecutableDirective &D) { +emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, + CodeGenFunction &CGF, + const OMPExecutableDirective &D) { assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " "teams directive expected to be " "emitted only for the host!"); + auto &Bld = CGF.Builder; + + // + // If the target directive is combined with a teams directive: + // Return the value in the thread_limit clause, if any. + // + // If the target directive is combined with a parallel directive: + // Return the value in the num_threads clause, if any. + // + // If both clauses are set, select the minimum of the two. + // + // If neither teams or parallel combined directives set the number of threads + // in a team, return 0 to denote the runtime default. + // + // If this is not a teams directive return nullptr. + + if (isOpenMPParallelDirective(D.getDirectiveKind())) { + llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); + llvm::Value *NumThreadsVal = nullptr; + + if (const auto *NumThreadsClause = + D.getSingleClause()) { + CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); + llvm::Value *NumThreads = + CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), + /*IgnoreResultAssign*/ true); + NumThreadsVal = + Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); + } + + return NumThreadsVal ? NumThreadsVal : DefaultThreadLimitVal; + } + // FIXME: For the moment we do not support combined directives with target and // teams, so we do not expect to get any thread_limit clause in the provided // directive. Once we support that, this assertion can be replaced by the @@ -6041,24 +6088,50 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Return value of the runtime offloading call. llvm::Value *Return; - auto *NumTeams = emitNumTeamsClauseForTargetDirective(RT, CGF, D); - auto *ThreadLimit = emitThreadLimitClauseForTargetDirective(RT, CGF, D); + auto *NumTeams = emitNumTeamsForTargetDirective(RT, CGF, D); + auto *NumThreads = emitNumThreadsForTargetDirective(RT, CGF, D); - // If we have NumTeams defined this means that we have an enclosed teams - // region. Therefore we also expect to have ThreadLimit defined. These two - // values should be defined in the presence of a teams directive, regardless - // of having any clauses associated. If the user is using teams but no - // clauses, these two values will be the default that should be passed to - // the runtime library - a 32-bit integer with the value zero. + // The target region is an outlined function launched by the runtime + // via calls __tgt_target() or __tgt_target_teams(). + // + // __tgt_target() launches a target region with one team and one thread, + // executing a serial region. This master thread may in turn launch + // more threads within its team upon encountering a parallel region, + // however, no additional teams can be launched on the device. + // + // __tgt_target_teams() launches a target region with one or more teams, + // each with one or more threads. This call is required for target + // constructs such as: + // 'target teams' + // 'target' / 'teams' + // 'target teams distribute parallel for' + // 'target parallel' + // and so on. + // + // Note that on the host and CPU targets, the runtime implementation of + // these calls simply call the outlined function without forking threads. + // The outlined functions themselves have runtime calls to + // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by + // the compiler in emitTeamsCall() and emitParallelCall(). + // + // In contrast, on the NVPTX target, the implementation of + // __tgt_target_teams() launches a GPU kernel with the requested number + // of teams and threads so no additional calls to the runtime are required. if (NumTeams) { - assert(ThreadLimit && "Thread limit expression should be available along " - "with number of teams."); + // If we have NumTeams defined this means that we have an enclosed teams + // region. Therefore we also expect to have NumThreads defined. These two + // values should be defined in the presence of a teams directive, + // regardless of having any clauses associated. If the user is using teams + // but no clauses, these two values will be the default that should be + // passed to the runtime library - a 32-bit integer with the value zero. + assert(NumThreads && "Thread limit expression should be available along " + "with number of teams."); llvm::Value *OffloadingArgs[] = { DeviceID, OutlinedFnID, PointerNum, Info.BasePointersArray, Info.PointersArray, Info.SizesArray, Info.MapTypesArray, NumTeams, - ThreadLimit}; + NumThreads}; Return = CGF.EmitRuntimeCall( RT.createRuntimeFunction(OMPRTL__tgt_target_teams), OffloadingArgs); } else { diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index c902118f65..9ef39b6889 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -6635,10 +6635,9 @@ OMPClause *Sema::ActOnOpenMPSingleExprClause(OpenMPClauseKind Kind, Expr *Expr, // the region in which to capture expressions associated with a clause. // A return value of OMPD_unknown signifies that the expression should not // be captured. -static OpenMPDirectiveKind -getOpenMPCaptureRegionForClause(OpenMPDirectiveKind DKind, - OpenMPClauseKind CKind, - OpenMPDirectiveKind NameModifier) { +static OpenMPDirectiveKind getOpenMPCaptureRegionForClause( + OpenMPDirectiveKind DKind, OpenMPClauseKind CKind, + OpenMPDirectiveKind NameModifier = OMPD_unknown) { OpenMPDirectiveKind CaptureRegion = OMPD_unknown; switch (CKind) { @@ -6708,6 +6707,69 @@ getOpenMPCaptureRegionForClause(OpenMPDirectiveKind DKind, llvm_unreachable("Unknown OpenMP directive"); } break; + case OMPC_num_threads: + switch (DKind) { + case OMPD_target_parallel: + CaptureRegion = OMPD_target; + break; + case OMPD_cancel: + case OMPD_parallel: + case OMPD_parallel_sections: + case OMPD_parallel_for: + case OMPD_parallel_for_simd: + case OMPD_target: + case OMPD_target_simd: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_task: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_target_data: + case OMPD_target_enter_data: + case OMPD_target_exit_data: + case OMPD_target_update: + // Do not capture num_threads-clause expressions. + break; + case OMPD_threadprivate: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_cancellation_point: + case OMPD_flush: + case OMPD_declare_reduction: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_teams: + case OMPD_simd: + case OMPD_for: + case OMPD_for_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskgroup: + case OMPD_distribute: + case OMPD_ordered: + case OMPD_atomic: + case OMPD_distribute_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + llvm_unreachable("Unexpected OpenMP directive with num_threads-clause"); + case OMPD_unknown: + llvm_unreachable("Unknown OpenMP directive"); + } + break; case OMPC_schedule: case OMPC_dist_schedule: case OMPC_firstprivate: @@ -6717,7 +6779,6 @@ getOpenMPCaptureRegionForClause(OpenMPDirectiveKind DKind, case OMPC_default: case OMPC_proc_bind: case OMPC_final: - case OMPC_num_threads: case OMPC_safelen: case OMPC_simdlen: case OMPC_collapse: @@ -6887,6 +6948,8 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads, SourceLocation LParenLoc, SourceLocation EndLoc) { Expr *ValExpr = NumThreads; + Stmt *HelperValStmt = nullptr; + OpenMPDirectiveKind CaptureRegion = OMPD_unknown; // OpenMP [2.5, Restrictions] // The num_threads expression must evaluate to a positive integer value. @@ -6894,8 +6957,16 @@ OMPClause *Sema::ActOnOpenMPNumThreadsClause(Expr *NumThreads, /*StrictlyPositive=*/true)) return nullptr; - return new (Context) - OMPNumThreadsClause(ValExpr, StartLoc, LParenLoc, EndLoc); + OpenMPDirectiveKind DKind = DSAStack->getCurrentDirective(); + CaptureRegion = getOpenMPCaptureRegionForClause(DKind, OMPC_num_threads); + if (CaptureRegion != OMPD_unknown) { + llvm::MapVector Captures; + ValExpr = tryBuildCapture(*this, ValExpr, Captures).get(); + HelperValStmt = buildPreInits(Context, Captures); + } + + return new (Context) OMPNumThreadsClause( + ValExpr, HelperValStmt, CaptureRegion, StartLoc, LParenLoc, EndLoc); } ExprResult Sema::VerifyPositiveIntegerConstantInClause(Expr *E, diff --git a/lib/Serialization/ASTReaderStmt.cpp b/lib/Serialization/ASTReaderStmt.cpp index ab158f7241..55eca5058e 100644 --- a/lib/Serialization/ASTReaderStmt.cpp +++ b/lib/Serialization/ASTReaderStmt.cpp @@ -1952,6 +1952,7 @@ void OMPClauseReader::VisitOMPFinalClause(OMPFinalClause *C) { } void OMPClauseReader::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) { + VisitOMPClauseWithPreInit(C); C->setNumThreads(Reader->Record.readSubExpr()); C->setLParenLoc(Reader->ReadSourceLocation()); } diff --git a/lib/Serialization/ASTWriterStmt.cpp b/lib/Serialization/ASTWriterStmt.cpp index 4e8660e028..ea6f579374 100644 --- a/lib/Serialization/ASTWriterStmt.cpp +++ b/lib/Serialization/ASTWriterStmt.cpp @@ -1818,6 +1818,7 @@ void OMPClauseWriter::VisitOMPFinalClause(OMPFinalClause *C) { } void OMPClauseWriter::VisitOMPNumThreadsClause(OMPNumThreadsClause *C) { + VisitOMPClauseWithPreInit(C); Record.AddStmt(C->getNumThreads()); Record.AddSourceLocation(C->getLParenLoc()); } diff --git a/test/OpenMP/target_parallel_codegen.cpp b/test/OpenMP/target_parallel_codegen.cpp index 028e6f8cfc..c7acb27cab 100644 --- a/test/OpenMP/target_parallel_codegen.cpp +++ b/test/OpenMP/target_parallel_codegen.cpp @@ -91,7 +91,7 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null) + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i32* null, i32 1, i32 0) // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align 4 // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align 4 // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 @@ -114,7 +114,7 @@ int foo(int n) { a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i32* getelementptr inbounds ([1 x i32], [1 x i32]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -140,7 +140,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i32* getelementptr inbounds ([2 x i32], [2 x i32]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -195,7 +195,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 20 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] // CHECK: [[TRY]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([9 x i32], [9 x i32]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 @@ -529,7 +529,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 60 // CHECK: br i1 [[IF]], label %[[TRY:[^,]+]], label %[[FAIL:[^,]+]] // CHECK: [[TRY]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i32* getelementptr inbounds ([5 x i32], [5 x i32]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [5 x i[[SZ]]], [5 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 @@ -592,7 +592,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i32* getelementptr inbounds ([4 x i32], [4 x i32]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -644,7 +644,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i32* getelementptr inbounds ([3 x i32], [3 x i32]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0) // CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/test/OpenMP/target_parallel_if_codegen.cpp b/test/OpenMP/target_parallel_if_codegen.cpp index 523cad0b20..02c69b95e1 100644 --- a/test/OpenMP/target_parallel_if_codegen.cpp +++ b/test/OpenMP/target_parallel_if_codegen.cpp @@ -145,7 +145,7 @@ int bar(int n){ // CHECK: store i8 [[FB]], i8* [[CONV]], align // CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 3, +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 0) // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align // CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 @@ -173,7 +173,7 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0) // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align // CHECK: br label {{%?}}[[END:.+]] // @@ -215,7 +215,7 @@ int bar(int n){ // CHECK: br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align // CHECK: br label {{%?}}[[END:.+]] // @@ -241,7 +241,7 @@ int bar(int n){ // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]] // // CHECK: [[IF_THEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 0, +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0) // CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align // CHECK: br label {{%?}}[[END:.+]] // @@ -267,7 +267,7 @@ int bar(int n){ // // CHECK: define {{.*}}[[FTEMPLATE]] // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 1, +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 0) // CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align // CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 @@ -281,7 +281,7 @@ int bar(int n){ // // // -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i32 -1, i8* @{{[^,]+}}, i32 2, +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0) // CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align // CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 diff --git a/test/OpenMP/target_parallel_num_threads_codegen.cpp b/test/OpenMP/target_parallel_num_threads_codegen.cpp new file mode 100644 index 0000000000..de6a13d087 --- /dev/null +++ b/test/OpenMP/target_parallel_num_threads_codegen.cpp @@ -0,0 +1,344 @@ +// Test host codegen. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CHECK --check-prefix CHECK-32 + +// Test target codegen - host bc file has to be created first. +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=powerpc64le-ibm-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm-bc %s -o %t-x86-host.bc +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -std=c++11 -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -emit-pch -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o %t %s +// RUN: %clang_cc1 -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=i386-pc-linux-gnu -std=c++11 -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 + +// expected-no-diagnostics +#ifndef HEADER +#define HEADER + +// CHECK-DAG: %ident_t = type { i32, i32, i32, i32, i8* } +// CHECK-DAG: [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00" +// CHECK-DAG: [[DEF_LOC:@.+]] = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) } + +// CHECK-DAG: [[S1:%.+]] = type { double } +// CHECK-DAG: [[ENTTY:%.+]] = type { i8*, i8*, i[[SZ:32|64]], i32, i32 } +// CHECK-DAG: [[DEVTY:%.+]] = type { i8*, i8*, [[ENTTY]]*, [[ENTTY]]* } +// CHECK-DAG: [[DSCTY:%.+]] = type { i32, [[DEVTY]]*, [[ENTTY]]*, [[ENTTY]]* } + +// TCHECK: [[ENTTY:%.+]] = type { i8*, i8*, i{{32|64}}, i32, i32 } + +// We have 6 target regions + +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 +// CHECK-DAG: @{{.*}} = private constant i8 0 + +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] +// TCHECK: @{{.+}} = constant [[ENTTY]] + +// Check if offloading descriptor is created. +// CHECK: [[ENTBEGIN:@.+]] = external constant [[ENTTY]] +// CHECK: [[ENTEND:@.+]] = external constant [[ENTTY]] +// CHECK: [[DEVBEGIN:@.+]] = external constant i8 +// CHECK: [[DEVEND:@.+]] = external constant i8 +// CHECK: [[IMAGES:@.+]] = internal unnamed_addr constant [1 x [[DEVTY]]] [{{.+}} { i8* [[DEVBEGIN]], i8* [[DEVEND]], [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] }] +// CHECK: [[DESC:@.+]] = internal constant [[DSCTY]] { i32 1, [[DEVTY]]* getelementptr inbounds ([1 x [[DEVTY]]], [1 x [[DEVTY]]]* [[IMAGES]], i32 0, i32 0), [[ENTTY]]* [[ENTBEGIN]], [[ENTTY]]* [[ENTEND]] } + +// Check target registration is registered as a Ctor. +// CHECK: appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 0, void ()* bitcast (void (i8*)* [[REGFN:@.+]] to void ()*), i8* null }] + + +template +tx ftemplate(int n) { + tx a = 0; + + #pragma omp target parallel num_threads(tx(20)) + { + } + + short b = 1; + #pragma omp target parallel num_threads(b) + { + a += b; + } + + return a; +} + +static +int fstatic(int n) { + + #pragma omp target parallel num_threads(n) + { + } + + #pragma omp target parallel num_threads(32+n) + { + } + + return n+1; +} + +struct S1 { + double a; + + int r1(int n){ + int b = 1; + + #pragma omp target parallel num_threads(n-b) + { + this->a = (double)b + 1.5; + } + + #pragma omp target parallel num_threads(1024) + { + this->a = 2.5; + } + + return (int)a; + } +}; + +// CHECK: define {{.*}}@{{.*}}bar{{.*}} +int bar(int n){ + int a = 0; + + S1 S; + // CHECK: call {{.*}}i32 [[FS1:@.+]]([[S1]]* {{.*}}, i32 {{.*}}) + a += S.r1(n); + + // CHECK: call {{.*}}i32 [[FSTATIC:@.+]](i32 {{.*}}) + a += fstatic(n); + + // CHECK: call {{.*}}i32 [[FTEMPLATE:@.+]](i32 {{.*}}) + a += ftemplate(n); + + return a; +} + + + +// +// CHECK: define {{.*}}[[FS1]]([[S1]]* {{%.+}}, i32 {{[^%]*}}[[PARM:%.+]]) +// +// CHECK-DAG: store i32 [[PARM]], i32* [[N_ADDR:%.+]], align +// CHECK: store i32 1, i32* [[B:%.+]], align +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[BV:%.+]] = load i32, i32* [[B]], align +// CHECK: [[SUB:%.+]] = sub nsw i32 [[NV]], [[BV]] +// CHECK: store i32 [[SUB]], i32* [[CAPE_ADDR:%.+]], align +// CHECK: [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32* +// CHECK-64: store i32 [[CEV]], i32* [[CONV]], align +// CHECK-32: store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT1:@.+]]([[S1]]* {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// +// +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.+}}, i32 1, i32 1024) +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT2:@.+]]([[S1]]* {{[^,]+}}) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// + + + + + + +// +// CHECK: define {{.*}}[[FSTATIC]](i32 {{[^%]*}}[[PARM:%.+]]) +// +// CHECK-DAG: store i32 [[PARM]], i32* [[N_ADDR:%.+]], align +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: store i32 [[NV]], i32* [[CAPE_ADDR:%.+]], align +// CHECK: [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32* +// CHECK-64: store i32 [[CEV]], i32* [[CONV]], align +// CHECK-32: store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT3:@.+]](i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// +// +// +// CHECK: [[NV:%.+]] = load i32, i32* [[N_ADDR]], align +// CHECK: [[ADD:%.+]] = add nsw i32 32, [[NV]] +// CHECK: store i32 [[ADD]], i32* [[CAPE_ADDR:%.+]], align +// CHECK: [[CEV:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i32* +// CHECK-64: store i32 [[CEV]], i32* [[CONV]], align +// CHECK-32: store i32 [[CEV]], i32* [[CAPEC_ADDR:%.+]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// CHECK: [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT4:@.+]](i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// + + + + + + +// +// CHECK: define {{.*}}[[FTEMPLATE]] +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20) +// CHECK-NEXT: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK-NEXT: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT5:@.+]]() +// CHECK: br label {{%?}}[[END]] +// +// CHECK: [[END]] +// +// +// +// CHECK: store i16 1, i16* [[B:%.+]], align +// CHECK: [[BV:%.+]] = load i16, i16* [[B]], align +// CHECK: store i16 [[BV]], i16* [[CAPE_ADDR:%.+]], align +// CHECK: [[CEV:%.+]] = load i16, i16* [[CAPE_ADDR]], align +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPEC_ADDR:%.+]] to i16* +// CHECK: store i16 [[CEV]], i16* [[CONV]], align +// CHECK: [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align +// CHECK: [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align +// CHECK: [[THREADS:%.+]] = sext i16 [[T]] to i32 +// +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i32 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]]) +// CHECK: store i32 [[RET]], i32* [[RHV:%.+]], align +// CHECK: [[RET2:%.+]] = load i32, i32* [[RHV]], align +// CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET2]], 0 +// CHECK: br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]] +// +// CHECK: [[FAIL]] +// CHECK: call void [[HVT6:@.+]](i[[SZ]] {{%.+}}, i[[SZ]] {{%.+}}, i[[SZ]] [[ARG]]) +// CHECK: br label {{%?}}[[END]] +// CHECK: [[END]] +// + + + + + + +// Check that the offloading functions are emitted and that the parallel function +// is appropriately guarded. + +// CHECK: define internal void [[HVT1]]([[S1]]* {{%.+}}, i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM2]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32* +// CHECK-64: [[NT:%.+]] = load i32, i32* [[CONV]], align +// CHECK-32: [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, +// +// + + +// CHECK: define internal void [[HVT2]]([[S1]]* {{%.+}}) +// CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 1024) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 1, +// +// + + + + + + + + +// CHECK: define internal void [[HVT3]](i[[SZ]] [[PARM:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32* +// CHECK-64: [[NT:%.+]] = load i32, i32* [[CONV]], align +// CHECK-32: [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, +// +// +// CHECK: define internal void [[HVT4]](i[[SZ]] [[PARM:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK-64: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i32* +// CHECK-64: [[NT:%.+]] = load i32, i32* [[CONV]], align +// CHECK-32: [[NT:%.+]] = load i32, i32* [[CAPE_ADDR]], align +// CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, +// +// + + + + + +// CHECK: define internal void [[HVT5]]( +// CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 20) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 0, +// +// + + +// CHECK: define internal void [[HVT6]](i[[SZ]] [[PARM1:%.+]], i[[SZ]] [[PARM2:%.+]], i[[SZ]] [[PARM3:%.+]]) +// CHECK-DAG: store i[[SZ]] [[PARM3]], i[[SZ]]* [[CAPE_ADDR:%.+]], align +// CHECK: [[CONV:%.+]] = bitcast i[[SZ]]* [[CAPE_ADDR]] to i16* +// CHECK: [[T:%.+]] = load i16, i16* [[CONV]], align +// CHECK: [[NT:%.+]] = sext i16 [[T]] to i32 +// CHECK: call void @__kmpc_push_num_threads(%ident_t* {{[^,]+}}, i32 {{[^,]+}}, i32 [[NT]]) +// CHECK: call {{.*}}void (%ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%ident_t* [[DEF_LOC]], i32 2, +// +// + + + +#endif diff --git a/tools/libclang/CIndex.cpp b/tools/libclang/CIndex.cpp index 48a0a332cf..22d144c4ab 100644 --- a/tools/libclang/CIndex.cpp +++ b/tools/libclang/CIndex.cpp @@ -2113,6 +2113,7 @@ void OMPClauseEnqueue::VisitOMPFinalClause(const OMPFinalClause *C) { } void OMPClauseEnqueue::VisitOMPNumThreadsClause(const OMPNumThreadsClause *C) { + VisitOMPClauseWithPreInit(C); Visitor->AddStmt(C->getNumThreads()); } -- 2.40.0