From: Alexey Bataev Date: Wed, 10 Apr 2019 19:11:33 +0000 (+0000) Subject: [OPENMP]Improve detection of number of teams, threads in target X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=dd7821f8092fffaddffb87250e4cdb305f8f9d21;p=clang [OPENMP]Improve detection of number of teams, threads in target regions. Added more complex analysis for number of teams and number of threads in the target regions, also merged related common code between CGOpenMPRuntime and CGOpenMPRuntimeNVPTX classes. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@358126 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 03d7b51b4e..4f631338c1 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -6475,12 +6475,59 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper( OffloadEntriesInfoManagerTy::OMPTargetRegionEntryTargetRegion); } -/// discard all CompoundStmts intervening between two constructs -static const Stmt *ignoreCompoundStmts(const Stmt *Body) { - while (const auto *CS = dyn_cast_or_null(Body)) - Body = CS->body_front(); +/// Checks if the expression is constant or does not have non-trivial function +/// calls. +static bool isTrivial(ASTContext &Ctx, const Expr * E) { + // We can skip constant expressions. + // We can skip expressions with trivial calls or simple expressions. + return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || + !E->hasNonTrivialCall(Ctx)) && + !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); +} - return Body; +const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx, + const Stmt *Body) { + const Stmt *Child = Body->IgnoreContainers(); + while (const auto *C = dyn_cast_or_null(Child)) { + Child = nullptr; + for (const Stmt *S : C->body()) { + if (const auto *E = dyn_cast(S)) { + if (isTrivial(Ctx, E)) + continue; + } + // Some of the statements can be ignored. + if (isa(S) || isa(S) || isa(S) || + isa(S) || isa(S)) + continue; + // Analyze declarations. + if (const auto *DS = dyn_cast(S)) { + if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { + if (isa(D) || isa(D) || + isa(D) || isa(D) || + isa(D) || isa(D) || + isa(D) || + isa(D) || + isa(D) || isa(D)) + return true; + const auto *VD = dyn_cast(D); + if (!VD) + return false; + return VD->isConstexpr() || + ((VD->getType().isTrivialType(Ctx) || + VD->getType()->isReferenceType()) && + (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); + })) + continue; + } + // Found multiple children - cannot get the one child only. + if (Child) + return nullptr; + Child = S; + } + if (Child) + Child = Child->IgnoreContainers(); + } + return Child; } /// Emit the number of teams for a target directive. Inspect the num_teams @@ -6492,63 +6539,163 @@ static const Stmt *ignoreCompoundStmts(const Stmt *Body) { /// /// Otherwise, return nullptr. static llvm::Value * -emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, +emitNumTeamsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " - "teams directive expected to be " - "emitted only for the host!"); - + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); CGBuilderTy &Bld = CGF.Builder; - - // If the target directive is combined with a teams directive: - // Return the value in the num_teams clause, if any. - // Otherwise, return 0 to denote the runtime default. - if (isOpenMPTeamsDirective(D.getDirectiveKind())) { - if (const auto *NumTeamsClause = D.getSingleClause()) { + switch (DirectiveKind) { + case OMPD_target: { + const auto *CS = D.getInnermostCapturedStmt(); + const auto *Body = + CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); + const Stmt *ChildStmt = + CGOpenMPRuntime::getSingleCompoundChild(CGF.getContext(), Body); + if (const auto *NestedDir = + dyn_cast_or_null(ChildStmt)) { + if (isOpenMPTeamsDirective(NestedDir->getDirectiveKind())) { + if (NestedDir->hasClausesOfKind()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const Expr *NumTeams = + NestedDir->getSingleClause()->getNumTeams(); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, + /*IsSigned=*/true); + } + return Bld.getInt32(0); + } + if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) || + isOpenMPSimdDirective(NestedDir->getDirectiveKind())) + return Bld.getInt32(1); + return Bld.getInt32(0); + } + return nullptr; + } + case OMPD_target_teams: + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: { + if (D.hasClausesOfKind()) { CodeGenFunction::RunCleanupsScope NumTeamsScope(CGF); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NumTeamsClause->getNumTeams(), - /*IgnoreResultAssign*/ true); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, + const Expr *NumTeams = + D.getSingleClause()->getNumTeams(); + llvm::Value *NumTeamsVal = + CGF.EmitScalarExpr(NumTeams, + /*IgnoreResultAssign*/ true); + return Bld.CreateIntCast(NumTeamsVal, CGF.Int32Ty, /*IsSigned=*/true); } - - // The default value is 0. return Bld.getInt32(0); } - - // If the target directive is combined with a parallel directive but not a - // teams directive, start one team. - if (isOpenMPParallelDirective(D.getDirectiveKind())) + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_simd: return Bld.getInt32(1); + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; + } + llvm_unreachable("Unexpected directive kind."); +} - // If the current target region has a teams region enclosed, we need to get - // the number of teams to pass to the runtime function call. This is done - // by generating the expression in a inlined region. This is required because - // the expression is captured in the enclosing target environment when the - // teams directive is not combined with target. - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - - if (const auto *TeamsDir = dyn_cast_or_null( - ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (const auto *NTE = TeamsDir->getSingleClause()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); +static llvm::Value *getNumThreads(CodeGenFunction &CGF, const CapturedStmt *CS, + llvm::Value *DefaultThreadLimitVal) { + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null(Child)) { + if (isOpenMPParallelDirective(Dir->getDirectiveKind())) { + if (Dir->hasClausesOfKind()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *NumTeams = CGF.EmitScalarExpr(NTE->getNumTeams()); - return Bld.CreateIntCast(NumTeams, CGF.Int32Ty, - /*IsSigned=*/true); + const auto *NumThreadsClause = + Dir->getSingleClause(); + CodeGenFunction::LexicalScope Scope( + CGF, NumThreadsClause->getNumThreads()->getSourceRange()); + if (const auto *PreInit = + cast_or_null(NumThreadsClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr()) { + CGF.EmitVarDecl(cast(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + llvm::Value *NumThreads = + CGF.EmitScalarExpr(NumThreadsClause->getNumThreads()); + NumThreads = CGF.Builder.CreateIntCast(NumThreads, CGF.Int32Ty, + /*IsSigned=*/true); + return DefaultThreadLimitVal + ? CGF.Builder.CreateSelect( + CGF.Builder.CreateICmpULT(DefaultThreadLimitVal, + NumThreads), + DefaultThreadLimitVal, NumThreads) + : NumThreads; } - - // If we have an enclosed teams directive but no num_teams clause we use - // the default value 0. - return Bld.getInt32(0); + return DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); } + if (isOpenMPSimdDirective(Dir->getDirectiveKind())) + return CGF.Builder.getInt32(1); + return DefaultThreadLimitVal; } - - // No teams associated with the directive. - return nullptr; + return DefaultThreadLimitVal ? DefaultThreadLimitVal + : CGF.Builder.getInt32(0); } /// Emit the number of threads for a target directive. Inspect the @@ -6560,98 +6707,179 @@ emitNumTeamsForTargetDirective(CGOpenMPRuntime &OMPRuntime, /// /// Otherwise, return nullptr. static llvm::Value * -emitNumThreadsForTargetDirective(CGOpenMPRuntime &OMPRuntime, - CodeGenFunction &CGF, +emitNumThreadsForTargetDirective(CodeGenFunction &CGF, const OMPExecutableDirective &D) { - assert(!CGF.getLangOpts().OpenMPIsDevice && "Clauses associated with the " - "teams directive expected to be " - "emitted only for the host!"); - + assert(!CGF.getLangOpts().OpenMPIsDevice && + "Clauses associated with the teams directive expected to be emitted " + "only for the host!"); + OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind(); + assert(isOpenMPTargetExecutionDirective(DirectiveKind) && + "Expected target-based executable directive."); CGBuilderTy &Bld = CGF.Builder; - - // - // If the target directive is combined with a teams directive: - // Return the value in the thread_limit clause, if any. - // - // If the target directive is combined with a parallel directive: - // Return the value in the num_threads clause, if any. - // - // If both clauses are set, select the minimum of the two. - // - // If neither teams or parallel combined directives set the number of threads - // in a team, return 0 to denote the runtime default. - // - // If this is not a teams directive return nullptr. - - if (isOpenMPTeamsDirective(D.getDirectiveKind()) || - isOpenMPParallelDirective(D.getDirectiveKind())) { - llvm::Value *DefaultThreadLimitVal = Bld.getInt32(0); - llvm::Value *NumThreadsVal = nullptr; - llvm::Value *ThreadLimitVal = nullptr; - - if (const auto *ThreadLimitClause = - D.getSingleClause()) { + llvm::Value *ThreadLimitVal = nullptr; + llvm::Value *NumThreadsVal = nullptr; + switch (DirectiveKind) { + case OMPD_target: { + const CapturedStmt *CS = D.getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null(Child)) { + if (Dir->hasClausesOfKind()) { + CGOpenMPInnerExprInfo CGInfo(CGF, *CS); + CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); + const auto *ThreadLimitClause = + Dir->getSingleClause(); + CodeGenFunction::LexicalScope Scope( + CGF, ThreadLimitClause->getThreadLimit()->getSourceRange()); + if (const auto *PreInit = + cast_or_null(ThreadLimitClause->getPreInitStmt())) { + for (const auto *I : PreInit->decls()) { + if (!I->hasAttr()) { + CGF.EmitVarDecl(cast(*I)); + } else { + CodeGenFunction::AutoVarEmission Emission = + CGF.EmitAutoVarAlloca(cast(*I)); + CGF.EmitAutoVarCleanups(Emission); + } + } + } + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + } + if (isOpenMPTeamsDirective(Dir->getDirectiveKind()) && + !isOpenMPDistributeDirective(Dir->getDirectiveKind())) { + CS = Dir->getInnermostCapturedStmt(); + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + Dir = dyn_cast_or_null(Child); + } + if (Dir && isOpenMPDistributeDirective(Dir->getDirectiveKind()) && + !isOpenMPSimdDirective(Dir->getDirectiveKind())) { + CS = Dir->getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + } + if (Dir && isOpenMPSimdDirective(Dir->getDirectiveKind())) + return Bld.getInt32(1); + } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + } + case OMPD_target_teams: { + if (D.hasClausesOfKind()) { CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); - llvm::Value *ThreadLimit = - CGF.EmitScalarExpr(ThreadLimitClause->getThreadLimit(), - /*IgnoreResultAssign*/ true); - ThreadLimitVal = Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); + const auto *ThreadLimitClause = D.getSingleClause(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); } - - if (const auto *NumThreadsClause = - D.getSingleClause()) { + const CapturedStmt *CS = D.getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + const Stmt *Child = CGOpenMPRuntime::getSingleCompoundChild( + CGF.getContext(), CS->getCapturedStmt()); + if (const auto *Dir = dyn_cast_or_null(Child)) { + if (Dir->getDirectiveKind() == OMPD_distribute) { + CS = Dir->getInnermostCapturedStmt(); + if (llvm::Value *NumThreads = getNumThreads(CGF, CS, ThreadLimitVal)) + return NumThreads; + } + } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + } + case OMPD_target_teams_distribute: + if (D.hasClausesOfKind()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + const auto *ThreadLimitClause = D.getSingleClause(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + } + return getNumThreads(CGF, D.getInnermostCapturedStmt(), ThreadLimitVal); + case OMPD_target_parallel: + case OMPD_target_parallel_for: + case OMPD_target_parallel_for_simd: + case OMPD_target_teams_distribute_parallel_for: + case OMPD_target_teams_distribute_parallel_for_simd: + if (D.hasClausesOfKind()) { + CodeGenFunction::RunCleanupsScope ThreadLimitScope(CGF); + const auto *ThreadLimitClause = D.getSingleClause(); + llvm::Value *ThreadLimit = CGF.EmitScalarExpr( + ThreadLimitClause->getThreadLimit(), /*IgnoreResultAssign=*/true); + ThreadLimitVal = + Bld.CreateIntCast(ThreadLimit, CGF.Int32Ty, /*IsSigned=*/true); + } + if (D.hasClausesOfKind()) { CodeGenFunction::RunCleanupsScope NumThreadsScope(CGF); - llvm::Value *NumThreads = - CGF.EmitScalarExpr(NumThreadsClause->getNumThreads(), - /*IgnoreResultAssign*/ true); + const auto *NumThreadsClause = D.getSingleClause(); + llvm::Value *NumThreads = CGF.EmitScalarExpr( + NumThreadsClause->getNumThreads(), /*IgnoreResultAssign=*/true); NumThreadsVal = Bld.CreateIntCast(NumThreads, CGF.Int32Ty, /*IsSigned=*/true); - } - - // Select the lesser of thread_limit and num_threads. - if (NumThreadsVal) ThreadLimitVal = ThreadLimitVal - ? Bld.CreateSelect(Bld.CreateICmpSLT(NumThreadsVal, + ? Bld.CreateSelect(Bld.CreateICmpULT(NumThreadsVal, ThreadLimitVal), NumThreadsVal, ThreadLimitVal) : NumThreadsVal; - - // Set default value passed to the runtime if either teams or a target - // parallel type directive is found but no clause is specified. - if (!ThreadLimitVal) - ThreadLimitVal = DefaultThreadLimitVal; - - return ThreadLimitVal; - } - - // If the current target region has a teams region enclosed, we need to get - // the thread limit to pass to the runtime function call. This is done - // by generating the expression in a inlined region. This is required because - // the expression is captured in the enclosing target environment when the - // teams directive is not combined with target. - - const CapturedStmt &CS = *D.getCapturedStmt(OMPD_target); - - if (const auto *TeamsDir = dyn_cast_or_null( - ignoreCompoundStmts(CS.getCapturedStmt()))) { - if (isOpenMPTeamsDirective(TeamsDir->getDirectiveKind())) { - if (const auto *TLE = TeamsDir->getSingleClause()) { - CGOpenMPInnerExprInfo CGInfo(CGF, CS); - CodeGenFunction::CGCapturedStmtRAII CapInfoRAII(CGF, &CGInfo); - llvm::Value *ThreadLimit = CGF.EmitScalarExpr(TLE->getThreadLimit()); - return CGF.Builder.CreateIntCast(ThreadLimit, CGF.Int32Ty, - /*IsSigned=*/true); - } - - // If we have an enclosed teams directive but no thread_limit clause we - // use the default value 0. - return CGF.Builder.getInt32(0); } + return ThreadLimitVal ? ThreadLimitVal : Bld.getInt32(0); + case OMPD_target_teams_distribute_simd: + case OMPD_target_simd: + return Bld.getInt32(1); + case OMPD_parallel: + case OMPD_for: + case OMPD_parallel_for: + case OMPD_parallel_sections: + case OMPD_for_simd: + case OMPD_parallel_for_simd: + case OMPD_cancel: + case OMPD_cancellation_point: + case OMPD_ordered: + case OMPD_threadprivate: + case OMPD_allocate: + case OMPD_task: + case OMPD_simd: + case OMPD_sections: + case OMPD_section: + case OMPD_single: + case OMPD_master: + case OMPD_critical: + case OMPD_taskyield: + case OMPD_barrier: + case OMPD_taskwait: + case OMPD_taskgroup: + case OMPD_atomic: + case OMPD_flush: + case OMPD_teams: + case OMPD_target_data: + case OMPD_target_exit_data: + case OMPD_target_enter_data: + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_distribute_parallel_for: + case OMPD_distribute_parallel_for_simd: + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + case OMPD_teams_distribute_parallel_for: + case OMPD_teams_distribute_parallel_for_simd: + case OMPD_target_update: + case OMPD_declare_simd: + case OMPD_declare_target: + case OMPD_end_declare_target: + case OMPD_declare_reduction: + case OMPD_declare_mapper: + case OMPD_taskloop: + case OMPD_taskloop_simd: + case OMPD_requires: + case OMPD_unknown: + break; } - - // No teams associated with the directive. - return nullptr; + llvm_unreachable("Unsupported directive kind."); } namespace { @@ -8174,70 +8402,17 @@ static void emitOffloadingArraysArgument( } } -/// Checks if the expression is constant or does not have non-trivial function -/// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { - // We can skip constant expressions. - // We can skip expressions with trivial calls or simple expressions. - return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || - !E->hasNonTrivialCall(Ctx)) && - !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); -} - -/// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one that is not evaluatable at the compile time. -static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { - if (const auto *C = dyn_cast(Body)) { - const Stmt *Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast(S)) { - if (isTrivial(Ctx, E)) - continue; - } - // Some of the statements can be ignored. - if (isa(S) || isa(S) || isa(S) || - isa(S) || isa(S)) - continue; - // Analyze declarations. - if (const auto *DS = dyn_cast(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { - if (isa(D) || isa(D) || - isa(D) || isa(D) || - isa(D) || isa(D) || - isa(D) || - isa(D) || - isa(D)) - return true; - const auto *VD = dyn_cast(D); - if (!VD) - return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); - })) - continue; - } - // Found multiple children - cannot get the one child only. - if (Child) - return Body; - Child = S; - } - if (Child) - return Child; - } - return Body; -} - /// Check for inner distribute directive. static const OMPExecutableDirective * getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = + CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -8248,8 +8423,9 @@ getNestedDistributeDirective(ASTContext &Ctx, const OMPExecutableDirective &D) { /*IgnoreCaptured=*/true); if (!Body) return nullptr; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast(ChildStmt)) { + ChildStmt = CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPDistributeDirective(DKind)) return NND; @@ -8406,8 +8582,8 @@ void CGOpenMPRuntime::emitTargetCall(CodeGenFunction &CGF, // Return value of the runtime offloading call. llvm::Value *Return; - llvm::Value *NumTeams = emitNumTeamsForTargetDirective(*this, CGF, D); - llvm::Value *NumThreads = emitNumThreadsForTargetDirective(*this, CGF, D); + llvm::Value *NumTeams = emitNumTeamsForTargetDirective(CGF, D); + llvm::Value *NumThreads = emitNumThreadsForTargetDirective(CGF, D); bool HasNowait = D.hasClausesOfKind(); // The target region is an outlined function launched by the runtime diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 7b2c0f1b91..42dc4d473b 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -772,6 +772,11 @@ public: virtual ~CGOpenMPRuntime() {} virtual void clear(); + /// Checks if the \p Body is the \a CompoundStmt and returns its child + /// statement iff there is only one that is not evaluatable at the compile + /// time. + static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body); + /// Get the platform-specific name separator. std::string getName(ArrayRef Parts) const; diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 632bca6ff9..5a8f50bf31 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -714,61 +714,6 @@ getDataSharingMode(CodeGenModule &CGM) { : CGOpenMPRuntimeNVPTX::Generic; } -/// Checks if the expression is constant or does not have non-trivial function -/// calls. -static bool isTrivial(ASTContext &Ctx, const Expr * E) { - // We can skip constant expressions. - // We can skip expressions with trivial calls or simple expressions. - return (E->isEvaluatable(Ctx, Expr::SE_AllowUndefinedBehavior) || - !E->hasNonTrivialCall(Ctx)) && - !E->HasSideEffects(Ctx, /*IncludePossibleEffects=*/true); -} - -/// Checks if the \p Body is the \a CompoundStmt and returns its child statement -/// iff there is only one that is not evaluatable at the compile time. -static const Stmt *getSingleCompoundChild(ASTContext &Ctx, const Stmt *Body) { - if (const auto *C = dyn_cast(Body)) { - const Stmt *Child = nullptr; - for (const Stmt *S : C->body()) { - if (const auto *E = dyn_cast(S)) { - if (isTrivial(Ctx, E)) - continue; - } - // Some of the statements can be ignored. - if (isa(S) || isa(S) || isa(S) || - isa(S) || isa(S)) - continue; - // Analyze declarations. - if (const auto *DS = dyn_cast(S)) { - if (llvm::all_of(DS->decls(), [&Ctx](const Decl *D) { - if (isa(D) || isa(D) || - isa(D) || isa(D) || - isa(D) || isa(D) || - isa(D) || - isa(D) || - isa(D) || isa(D)) - return true; - const auto *VD = dyn_cast(D); - if (!VD) - return false; - return VD->isConstexpr() || - ((VD->getType().isTrivialType(Ctx) || - VD->getType()->isReferenceType()) && - (!VD->hasInit() || isTrivial(Ctx, VD->getInit()))); - })) - continue; - } - // Found multiple children - cannot get the one child only. - if (Child) - return Body; - Child = S; - } - if (Child) - return Child; - } - return Body; -} - /// Check if the parallel directive has an 'if' clause with non-constant or /// false condition. Also, check if the number of threads is strictly specified /// and run those directives in non-SPMD mode. @@ -794,9 +739,10 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -808,8 +754,9 @@ static bool hasNestedSPMDDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && !hasParallelIfNumThreadsClause(Ctx, *NND)) @@ -971,9 +918,10 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, const auto *CS = D.getInnermostCapturedStmt(); const auto *Body = CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true); - const Stmt *ChildStmt = getSingleCompoundChild(Ctx, Body); + const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); - if (const auto *NestedDir = dyn_cast(ChildStmt)) { + if (const auto *NestedDir = + dyn_cast_or_null(ChildStmt)) { OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind(); switch (D.getDirectiveKind()) { case OMPD_target: @@ -986,8 +934,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -998,8 +947,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPParallelDirective(DKind) && isOpenMPWorksharingDirective(DKind) && @@ -1010,8 +960,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -1031,8 +982,9 @@ static bool hasNestedLightweightDirective(ASTContext &Ctx, /*IgnoreCaptured=*/true); if (!Body) return false; - ChildStmt = getSingleCompoundChild(Ctx, Body); - if (const auto *NND = dyn_cast(ChildStmt)) { + ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body); + if (const auto *NND = + dyn_cast_or_null(ChildStmt)) { DKind = NND->getDirectiveKind(); if (isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND)) @@ -2014,11 +1966,11 @@ getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D, "expected teams directive."); const OMPExecutableDirective *Dir = &D; if (!isOpenMPDistributeDirective(D.getDirectiveKind())) { - if (const Stmt *S = getSingleCompoundChild( + if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild( Ctx, D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers( /*IgnoreCaptured=*/true))) { - Dir = dyn_cast(S); + Dir = dyn_cast_or_null(S); if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind())) Dir = nullptr; } diff --git a/test/OpenMP/distribute_simd_reduction_codegen.cpp b/test/OpenMP/distribute_simd_reduction_codegen.cpp index 85b0e80aad..63fb75e000 100644 --- a/test/OpenMP/distribute_simd_reduction_codegen.cpp +++ b/test/OpenMP/distribute_simd_reduction_codegen.cpp @@ -46,7 +46,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]]( // LAMBDA: ret #pragma omp target @@ -123,7 +123,7 @@ int main() { } // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() // CHECK: ret diff --git a/test/OpenMP/target_map_codegen.cpp b/test/OpenMP/target_map_codegen.cpp index b267a72b4c..44d7ffc9cc 100644 --- a/test/OpenMP/target_map_codegen.cpp +++ b/test/OpenMP/target_map_codegen.cpp @@ -259,7 +259,7 @@ void implicit_maps_nested_integer (int a){ // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}}) #pragma omp parallel { - // CK4-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}) + // CK4-DAG: call i32 @__tgt_target_teams(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i32 1, i32 0) // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0 // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0 // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0 @@ -1516,7 +1516,7 @@ void explicit_maps_single (int ii){ int b = a; // Region 00n - // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}) + // CK19-DAG: call i32 @__tgt_target_teams(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}, i32 1, i32 0) // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]] // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]] diff --git a/test/OpenMP/target_simd_codegen.cpp b/test/OpenMP/target_simd_codegen.cpp index 13504213c5..6a9ac90fe7 100644 --- a/test/OpenMP/target_simd_codegen.cpp +++ b/test/OpenMP/target_simd_codegen.cpp @@ -110,7 +110,7 @@ int foo(int n) { double cn[5][n]; TT d; - // CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) + // CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 1) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -128,7 +128,7 @@ int foo(int n) { a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 1) // CHECK-DAG: [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0 @@ -165,7 +165,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -217,7 +217,7 @@ int foo(int n) { // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0)) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 @@ -489,7 +489,7 @@ int bar(int n){ // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [6 x i[[SZ]]], [6 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 @@ -564,7 +564,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([4 x i[[SZ]]], [4 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -614,7 +614,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0)) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/test/OpenMP/target_simd_depend_codegen.cpp b/test/OpenMP/target_simd_depend_codegen.cpp index 37f726aa2a..8a9ed02b50 100644 --- a/test/OpenMP/target_simd_depend_codegen.cpp +++ b/test/OpenMP/target_simd_depend_codegen.cpp @@ -194,7 +194,7 @@ int foo(int n) { // CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0 // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 -// CHECK: [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 1, i32 1) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -211,7 +211,7 @@ int foo(int n) { // CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 -// CHECK: [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 1) // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] diff --git a/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 0a5482e9b1..4e7d7d3c8d 100644 --- a/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -152,7 +152,7 @@ int foo(int n) { a += 1; } - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i[[SZ]]* getelementptr inbounds ([1 x i[[SZ]]], [1 x i[[SZ]]]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 1) // CHECK-DAG: [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]] @@ -176,7 +176,7 @@ int foo(int n) { // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0 @@ -227,7 +227,7 @@ int foo(int n) { // CHECK: [[CNELEMSIZE2:%.+]] = mul nuw i[[SZ]] 5, [[VLA1:%.+]] // CHECK: [[CNSIZE:%.+]] = mul nuw i[[SZ]] [[CNELEMSIZE2]], 8 - // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0) + // CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[S:%[^,]+]], i32 0, i32 0 @@ -564,7 +564,7 @@ int bar(int n){ // CHECK: [[CELEMSIZE2:%.+]] = mul nuw i[[SZ]] 2, [[VLA0:%.+]] // CHECK: [[CSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2 -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0 // CHECK-DAG: [[SR]] = getelementptr inbounds [6 x i[[SZ]]], [6 x i[[SZ]]]* [[S:%.+]], i32 0, i32 0 @@ -639,7 +639,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([5 x i[[SZ]]], [5 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([5 x i[[SZ]]], [5 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0 @@ -696,7 +696,7 @@ int bar(int n){ // CHECK: [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40 // CHECK: br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]] // CHECK: [[IFTHEN]] -// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0) +// CHECK-DAG: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 1) // CHECK-DAG: [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0 // CHECK-DAG: [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0 diff --git a/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp index 0516979f8c..21055ea4b1 100644 --- a/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp @@ -194,7 +194,7 @@ int foo(int n) { // CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0 // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 -// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 0) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i[[SZ]]* null, i64* null, i32 0, i32 1) // CHECK-NEXT: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] // CHECK: [[FAIL]] @@ -211,7 +211,7 @@ int foo(int n) { // CHECK: [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2 // CHECK: [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]], // CHECK: [[DEVICE:%.+]] = sext i32 [[DEV]] to i64 -// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0) +// CHECK: [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i[[SZ]]* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 1) // CHECK: [[ERROR:%.+]] = icmp ne i32 [[RET]], 0 // CHECK-NEXT: br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]] diff --git a/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp index b2b911e888..463a6afd2d 100644 --- a/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp @@ -85,7 +85,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) // LAMBDA: ret #pragma omp target teams distribute simd firstprivate(g, g1, sivar) @@ -164,7 +164,7 @@ int main() { } // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() // CHECK: ret @@ -225,8 +225,8 @@ int main() { // CHECK-DAG: [[VAR_ADDR_REF:%.+]] = load{{.+}} [[VAR_ADDR]], // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 -// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) @@ -258,7 +258,7 @@ int main() { // CHECK: ret void // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}) // CHECK: ret @@ -311,8 +311,8 @@ int main() { // CHECK: [[S_ARR_ADDR_REF:%.+]] = load [2 x [[S_INT_TY]]]*, [2 x [[S_INT_TY]]]** [[S_ARR_ADDR]], // firstprivate vec(vec): copy from *_addr into priv1 and then from priv1 into priv2 -// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* -// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* +// CHECK-DAG: [[VEC_DEST_PRIV:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_PRIV]] to i8* +// CHECK-DAG: [[VEC_SRC:%.+]] = bitcast [2 x i{{[0-9]+}}]* [[VEC_ADDR_VAL]] to i8* // CHECK: call void @llvm.memcpy.{{.+}}(i8* align {{[0-9]+}} [[VEC_DEST_PRIV]], i8* align {{[0-9]+}} [[VEC_SRC]], {{.+}}) // firstprivate(s_arr) diff --git a/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp index 4fa4d04c70..402c83921d 100644 --- a/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp @@ -84,7 +84,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]]() // LAMBDA: ret #pragma omp target teams distribute simd private(g, g1, sivar) @@ -151,7 +151,7 @@ int main() { } // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]]() // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() // CHECK: ret diff --git a/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp index c533bb30f8..a850238af5 100644 --- a/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp +++ b/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp @@ -46,7 +46,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]]( // LAMBDA: ret #pragma omp target teams distribute simd reduction(+: sivar) @@ -124,7 +124,7 @@ int main() { // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]](i32* {{.+}}) // CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]() // CHECK: ret i32 [[RES]] diff --git a/test/OpenMP/teams_distribute_simd_codegen.cpp b/test/OpenMP/teams_distribute_simd_codegen.cpp index c89a936b64..ab1482855e 100644 --- a/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -39,7 +39,7 @@ int teams_argument_global(int n) { // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]], // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]], - // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1) // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]], #pragma omp target @@ -48,7 +48,7 @@ int teams_argument_global(int n) { a[i] = 0; } - // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}}) #pragma omp target {{{ @@ -119,7 +119,7 @@ int teams_local_arg(void) { int n = 100; int a[n]; - // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) #pragma omp target #pragma omp teams distribute simd @@ -135,7 +135,7 @@ int teams_local_arg(void) { // CK2: define internal void @[[OUTL1]]({{.+}}) // CK2: call void @__kmpc_for_static_init_4( // CK2: call void @__kmpc_for_static_fini( - // CK2: ret void + // CK2: ret void return a[0]; } @@ -168,7 +168,7 @@ struct SS{ // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}( int foo(void) { - // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}}) #pragma omp target #pragma omp teams distribute simd @@ -184,7 +184,7 @@ struct SS{ // CK3: define internal void @[[OUTL1]]({{.+}}) // CK3: call void @__kmpc_for_static_init_4( // CK3: call void @__kmpc_for_static_fini( - // CK3: ret void + // CK3: ret void return a[0]; } @@ -241,7 +241,7 @@ int main (int argc, char **argv) { } // CK4: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}}) -// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CK4: call void @[[OFFL1:.+]]({{.+}}) // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}}) // CK4: ret @@ -256,7 +256,7 @@ int main (int argc, char **argv) { // CK4: ret void // CK4: define {{.*}}i32 @[[TMAIN]]({{.+}}) -// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}}) +// CK4: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1) // CK4: call void @[[OFFLT:.+]]({{.+}}) // CK4: ret // CK4-NEXT: } diff --git a/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp index 27320191fa..ec0b006a37 100644 --- a/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp +++ b/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp @@ -86,7 +86,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}}) // LAMBDA: ret #pragma omp target @@ -167,7 +167,7 @@ int main() { } // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() // CHECK: ret @@ -261,7 +261,7 @@ int main() { // CHECK: ret void // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}}) // CHECK: ret diff --git a/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/test/OpenMP/teams_distribute_simd_private_codegen.cpp index fc7000a699..10ad66ad31 100644 --- a/test/OpenMP/teams_distribute_simd_private_codegen.cpp +++ b/test/OpenMP/teams_distribute_simd_private_codegen.cpp @@ -85,7 +85,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]]( // LAMBDA: ret #pragma omp target @@ -155,7 +155,7 @@ int main() { } // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]]() // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() // CHECK: ret diff --git a/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp index 10901dea12..2137913bcd 100644 --- a/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp +++ b/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp @@ -47,7 +47,7 @@ int main() { // LAMBDA: call void [[OUTER_LAMBDA:@.+]]( [&]() { // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]]( - // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) + // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // LAMBDA: call void @[[LOFFL1:.+]]( // LAMBDA: ret #pragma omp target @@ -128,7 +128,7 @@ int main() { // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer // CHECK: define {{.*}}i{{[0-9]+}} @main() -// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0) +// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1) // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}}) // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]() // CHECK: ret