#include "CGOpenMPRuntime.h"
#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
#include "clang/AST/Decl.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/DerivedTypes.h"
using namespace clang;
using namespace CodeGen;
+/// \brief API for captured statement code generation in OpenMP constructs.
+class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
+public:
+ CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &CS,
+ const VarDecl *ThreadIDVar)
+ : CGCapturedStmtInfo(CS, CR_OpenMP), ThreadIDVar(ThreadIDVar),
+ Directive(D) {
+ assert(ThreadIDVar != nullptr && "No ThreadID in OpenMP region.");
+ }
+
+ virtual ~CGOpenMPRegionInfo() override{};
+
+ /// \brief Gets a variable or parameter for storing global thread id
+ /// inside OpenMP construct.
+ const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
+
+ /// \brief Gets an LValue for the current ThreadID variable.
+ LValue getThreadIDVariableLValue(CodeGenFunction &CGF);
+
+ static bool classof(const CGCapturedStmtInfo *Info) {
+ return Info->getKind() == CR_OpenMP;
+ }
+
+ /// \brief Emit the captured statement body.
+ virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
+
+ /// \brief Get the name of the capture helper.
+ virtual StringRef getHelperName() const override { return ".omp_outlined."; }
+
+private:
+ /// \brief A variable or parameter storing global thread id for OpenMP
+ /// constructs.
+ const VarDecl *ThreadIDVar;
+ /// \brief OpenMP executable directive associated with the region.
+ const OMPExecutableDirective &Directive;
+};
+
+LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
+ return CGF.MakeNaturalAlignAddrLValue(
+ CGF.GetAddrOfLocalVar(ThreadIDVar),
+ CGF.getContext().getPointerType(ThreadIDVar->getType()));
+}
+
void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, Stmt *S) {
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
CGF.EmitOMPFirstprivateClause(Directive, PrivateScope);
KmpCriticalNameTy = llvm::ArrayType::get(CGM.Int32Ty, /*NumElements*/ 8);
}
+llvm::Value *
+CGOpenMPRuntime::EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar) {
+ const CapturedStmt *CS = cast<CapturedStmt>(D.getAssociatedStmt());
+ CodeGenFunction CGF(CGM, true);
+ CGOpenMPRegionInfo CGInfo(D, *CS, ThreadIDVar);
+ CGF.CapturedStmtInfo = &CGInfo;
+ return CGF.GenerateCapturedStmtFunction(*CS);
+}
+
llvm::Value *
CGOpenMPRuntime::GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags) {
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
assert(CGF.CurFn && "No function in current CodeGenFunction.");
llvm::Value *LocValue = nullptr;
- OpenMPLocMapTy::iterator I = OpenMPLocMap.find(CGF.CurFn);
- if (I != OpenMPLocMap.end()) {
- LocValue = I->second;
+ OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
+ if (I != OpenMPLocThreadIDMap.end()) {
+ LocValue = I->second.DebugLoc;
} else {
// Generate "ident_t .kmpc_loc.addr;"
llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
- OpenMPLocMap[CGF.CurFn] = AI;
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ Elem.second.DebugLoc = AI;
LocValue = AI;
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
assert(CGF.CurFn && "No function in current CodeGenFunction.");
llvm::Value *ThreadID = nullptr;
- OpenMPThreadIDMapTy::iterator I = OpenMPThreadIDMap.find(CGF.CurFn);
- if (I != OpenMPThreadIDMap.end()) {
- ThreadID = I->second;
- } else {
- // Check if current function is a function which has first parameter
- // with type int32 and name ".global_tid.".
- if (!CGF.CurFn->arg_empty() &&
- CGF.CurFn->arg_begin()->getType()->isPointerTy() &&
- CGF.CurFn->arg_begin()
- ->getType()
- ->getPointerElementType()
- ->isIntegerTy() &&
- CGF.CurFn->arg_begin()
- ->getType()
- ->getPointerElementType()
- ->getIntegerBitWidth() == 32 &&
- CGF.CurFn->arg_begin()->hasName() &&
- CGF.CurFn->arg_begin()->getName() == ".global_tid.") {
- CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
- ThreadID = CGF.Builder.CreateLoad(CGF.CurFn->arg_begin());
- } else {
- // Generate "int32 .kmpc_global_thread_num.addr;"
- CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
- CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
- llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
- ThreadID = CGF.EmitRuntimeCall(
- CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
+ // Check whether we've already cached a load of the thread id in this
+ // function.
+ OpenMPLocThreadIDMapTy::iterator I = OpenMPLocThreadIDMap.find(CGF.CurFn);
+ if (I != OpenMPLocThreadIDMap.end()) {
+ ThreadID = I->second.ThreadID;
+ } else if (auto OMPRegionInfo =
+ dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo)) {
+ // Check if this an outlined function with thread id passed as argument.
+ auto ThreadIDVar = OMPRegionInfo->getThreadIDVariable();
+ auto LVal = OMPRegionInfo->getThreadIDVariableLValue(CGF);
+ auto RVal = CGF.EmitLoadOfLValue(LVal, Loc);
+ LVal = CGF.MakeNaturalAlignAddrLValue(RVal.getScalarVal(),
+ ThreadIDVar->getType());
+ ThreadID = CGF.EmitLoadOfLValue(LVal, Loc).getScalarVal();
+ // If value loaded in entry block, cache it and use it everywhere in
+ // function.
+ if (CGF.Builder.GetInsertBlock() == CGF.AllocaInsertPt->getParent()) {
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ Elem.second.ThreadID = ThreadID;
}
- OpenMPThreadIDMap[CGF.CurFn] = ThreadID;
+ } else {
+ // This is not an outlined function region - need to call __kmpc_int32
+ // kmpc_global_thread_num(ident_t *loc).
+ // Generate thread id value and cache this value for use across the
+ // function.
+ CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
+ CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
+ llvm::Value *Args[] = {EmitOpenMPUpdateLocation(CGF, Loc)};
+ ThreadID = CGF.EmitRuntimeCall(
+ CreateRuntimeFunction(OMPRTL__kmpc_global_thread_num), Args);
+ auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
+ Elem.second.ThreadID = ThreadID;
}
return ThreadID;
}
void CGOpenMPRuntime::FunctionFinished(CodeGenFunction &CGF) {
assert(CGF.CurFn && "No function in current CodeGenFunction.");
- if (OpenMPThreadIDMap.count(CGF.CurFn))
- OpenMPThreadIDMap.erase(CGF.CurFn);
- if (OpenMPLocMap.count(CGF.CurFn))
- OpenMPLocMap.erase(CGF.CurFn);
+ if (OpenMPLocThreadIDMap.count(CGF.CurFn))
+ OpenMPLocThreadIDMap.erase(CGF.CurFn);
}
llvm::Type *CGOpenMPRuntime::getIdentTyPointerTy() {
#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIME_H
-#include "CodeGenFunction.h"
-#include "clang/AST/StmtOpenMP.h"
-#include "clang/AST/Type.h"
+#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/IR/Type.h"
-#include "llvm/IR/Value.h"
-namespace clang {
-
-namespace CodeGen {
-
-/// \brief API for captured statement code generation in OpenMP constructs.
-class CGOpenMPRegionInfo : public CodeGenFunction::CGCapturedStmtInfo {
-public:
- CGOpenMPRegionInfo(const OMPExecutableDirective &D, const CapturedStmt &S,
- const VarDecl *ThreadIDVar)
- : CGCapturedStmtInfo(S, CR_OpenMP), ThreadIDVar(ThreadIDVar),
- Directive(D) {}
-
- virtual ~CGOpenMPRegionInfo() override{};
+namespace llvm {
+class ArrayType;
+class Constant;
+class Function;
+class FunctionType;
+class StructType;
+class Type;
+class Value;
+} // namespace llvm
- /// \brief Gets a variable or parameter for storing global thread id
- /// inside OpenMP construct.
- const VarDecl *getThreadIDVariable() const { return ThreadIDVar; }
-
- static bool classof(const CGCapturedStmtInfo *Info) {
- return Info->getKind() == CR_OpenMP;
- }
+namespace clang {
- /// \brief Emit the captured statement body.
- virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) override;
+class OMPExecutableDirective;
+class VarDecl;
- /// \brief Get the name of the capture helper.
- virtual StringRef getHelperName() const override { return ".omp_outlined."; }
+namespace CodeGen {
-private:
- /// \brief A variable or parameter storing global thread id for OpenMP
- /// constructs.
- const VarDecl *ThreadIDVar;
- /// \brief OpenMP executable directive associated with the region.
- const OMPExecutableDirective &Directive;
-};
+class CodeGenFunction;
+class CodeGenModule;
class CGOpenMPRuntime {
public:
/// \brief Default const ident_t object used for initialization of all other
/// ident_t objects.
llvm::Constant *DefaultOpenMPPSource;
- /// \brief Map of flags and corrsponding default locations.
+ /// \brief Map of flags and corresponding default locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDefaultLocMapTy;
OpenMPDefaultLocMapTy OpenMPDefaultLocMap;
llvm::Value *GetOrCreateDefaultOpenMPLocation(OpenMPLocationFlags Flags);
IdentField_PSource
};
llvm::StructType *IdentTy;
- /// \brief Map for Sourcelocation and OpenMP runtime library debug locations.
+ /// \brief Map for SourceLocation and OpenMP runtime library debug locations.
typedef llvm::DenseMap<unsigned, llvm::Value *> OpenMPDebugLocMapTy;
OpenMPDebugLocMapTy OpenMPDebugLocMap;
/// \brief The type for a microtask which gets passed to __kmpc_fork_call().
/// Original representation is:
/// typedef void (kmpc_micro)(kmp_int32 global_tid, kmp_int32 bound_tid,...);
llvm::FunctionType *Kmpc_MicroTy;
- /// \brief Map of local debug location and functions.
- typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPLocMapTy;
- OpenMPLocMapTy OpenMPLocMap;
- /// \brief Map of local ThreadID and functions.
- typedef llvm::DenseMap<llvm::Function *, llvm::Value *> OpenMPThreadIDMapTy;
- OpenMPThreadIDMapTy OpenMPThreadIDMap;
+ /// \brief Stores debug location and ThreadID for the function.
+ struct DebugLocThreadIdTy {
+ llvm::Value *DebugLoc;
+ llvm::Value *ThreadID;
+ };
+ /// \brief Map of local debug location, ThreadId and functions.
+ typedef llvm::DenseMap<llvm::Function *, DebugLocThreadIdTy>
+ OpenMPLocThreadIDMapTy;
+ OpenMPLocThreadIDMapTy OpenMPLocThreadIDMap;
/// \brief Type kmp_critical_name, originally defined as typedef kmp_int32
/// kmp_critical_name[8];
llvm::ArrayType *KmpCriticalNameTy;
llvm::StringMap<llvm::Value *, llvm::BumpPtrAllocator> CriticalRegionVarNames;
/// \brief Emits object of ident_t type with info for source location.
- /// \param CGF Reference to current CodeGenFunction.
- /// \param Loc Clang source location.
/// \param Flags Flags for OpenMP location.
///
llvm::Value *
llvm::Constant *CreateRuntimeFunction(OpenMPRTLFunction Function);
/// \brief Gets thread id value for the current thread.
- /// \param CGF Reference to current CodeGenFunction.
- /// \param Loc Clang source location.
///
llvm::Value *GetOpenMPThreadID(CodeGenFunction &CGF, SourceLocation Loc);
explicit CGOpenMPRuntime(CodeGenModule &CGM);
virtual ~CGOpenMPRuntime() {}
+ /// \brief Emits outlined function for the specified OpenMP directive \a D
+ /// (required for parallel and task directives). This outlined function has
+ /// type void(*)(kmp_int32 /*ThreadID*/, kmp_int32 /*BoundID*/, struct
+ /// context_vars*).
+ /// \param D OpenMP directive.
+ /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+ ///
+ virtual llvm::Value *
+ EmitOpenMPOutlinedFunction(const OMPExecutableDirective &D,
+ const VarDecl *ThreadIDVar);
+
/// \brief Cleans up references to the objects in finished function.
- /// \param CGF Reference to finished CodeGenFunction.
///
void FunctionFinished(CodeGenFunction &CGF);
/// \brief Emits code for parallel call of the \a OutlinedFn with variables
/// captured in a record which address is stored in \a CapturedStruct.
- /// \param CGF Reference to current CodeGenFunction.
- /// \param Loc Clang source location.
- /// \param OutlinedFn Outlined function to be run in parallel threads.
+ /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+ /// this function is void(*)(kmp_int32, kmp_int32, struct context_vars*).
/// \param CapturedStruct A pointer to the record with the references to
/// variables used in \a OutlinedFn function.
///
/// \brief Returns corresponding lock object for the specified critical region
/// name. If the lock object does not exist it is created, otherwise the
/// reference to the existing copy is returned.
+ /// \param CriticalName Name of the critical region.
+ ///
llvm::Value *GetCriticalRegionLock(StringRef CriticalName);
/// \brief Emits start of the critical region by calling void
/// __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
/// * \a RegionLock)
- /// \param CGF Reference to current CodeGenFunction.
/// \param RegionLock The lock object for critical region.
- /// \param Loc Location of the construct.
virtual void EmitOMPCriticalRegionStart(CodeGenFunction &CGF,
llvm::Value *RegionLock,
SourceLocation Loc);
/// \brief Emits end of the critical region by calling void
/// __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name
/// * \a RegionLock)
- /// \param CGF Reference to current CodeGenFunction.
/// \param RegionLock The lock object for critical region.
- /// \param Loc Location of the construct.
virtual void EmitOMPCriticalRegionEnd(CodeGenFunction &CGF,
llvm::Value *RegionLock,
SourceLocation Loc);
/// \brief Emits a barrier for OpenMP threads.
- /// \param CGF Reference to current CodeGenFunction.
- /// \param Loc Clang source location.
/// \param Flags Flags for the barrier.
///
virtual void EmitOMPBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
}
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
- const CapturedStmt *CS = cast<CapturedStmt>(S.getAssociatedStmt());
- llvm::Value *CapturedStruct = GenerateCapturedStmtArgument(*CS);
-
- llvm::Value *OutlinedFn;
- {
- CodeGenFunction CGF(CGM, true);
- CGOpenMPRegionInfo CGInfo(S, *CS, *CS->getCapturedDecl()->param_begin());
- CGF.CapturedStmtInfo = &CGInfo;
- OutlinedFn = CGF.GenerateCapturedStmtFunction(*CS);
- }
-
+ auto CS = cast<CapturedStmt>(S.getAssociatedStmt());
+ auto CapturedStruct = GenerateCapturedStmtArgument(*CS);
+ auto OutlinedFn = CGM.getOpenMPRuntime().EmitOpenMPOutlinedFunction(
+ S, *CS->getCapturedDecl()->param_begin());
CGM.getOpenMPRuntime().EmitOMPParallelCall(*this, S.getLocStart(), OutlinedFn,
CapturedStruct);
}
bool isCXXThisExprCaptured() const { return CXXThisFieldDecl != nullptr; }
FieldDecl *getThisFieldDecl() const { return CXXThisFieldDecl; }
+ static bool classof(const CGCapturedStmtInfo *) {
+ return true;
+ }
+
/// \brief Emit the captured statement body.
virtual void EmitBody(CodeGenFunction &CGF, Stmt *S) {
RegionCounter Cnt = CGF.getPGORegionCounter(S);
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_FLOAT_TY]]],
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_FLOAT_TY]],
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]]
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_MAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}** [[T_VAR_PTR_REF]],
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}* [[T_VAR_REF]],
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
// CHECK: call {{.*}} [[S_FLOAT_TY_COPY_CONSTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]], [[S_FLOAT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call {{.*}} [[S_FLOAT_TY_DESTR]]([[S_FLOAT_TY]]*
// CHECK: [[VEC_PRIV:%.+]] = alloca [2 x i{{[0-9]+}}],
// CHECK: [[S_ARR_PRIV:%.+]] = alloca [2 x [[S_INT_TY]]],
// CHECK: [[VAR_PRIV:%.+]] = alloca [[S_INT_TY]],
-// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_ADDR]]
+// CHECK: store i{{[0-9]+}}* [[GTID_ADDR]], i{{[0-9]+}}** [[GTID_ADDR_ADDR:%.+]],
// CHECK: [[T_VAR_PTR_REF:%.+]] = getelementptr inbounds [[CAP_TMAIN_TY]]* %{{.+}}, i{{[0-9]+}} 0, i{{[0-9]+}} 1
// CHECK: [[T_VAR_REF:%.+]] = load i{{[0-9]+}}** [[T_VAR_PTR_REF]],
// CHECK: [[T_VAR_VAL:%.+]] = load i{{[0-9]+}}* [[T_VAR_REF]],
// CHECK: call {{.*}} [[ST_TY_DEFAULT_CONSTR]]([[ST_TY]]* [[ST_TY_TEMP:%.+]])
// CHECK: call {{.*}} [[S_INT_TY_COPY_CONSTR]]([[S_INT_TY]]* [[VAR_PRIV]], [[S_INT_TY]]* {{.*}} [[VAR_REF]], [[ST_TY]]* [[ST_TY_TEMP]])
// CHECK: call {{.*}} [[ST_TY_DESTR]]([[ST_TY]]* [[ST_TY_TEMP]])
+// CHECK: [[GTID_REF:%.+]] = load i{{[0-9]+}}** [[GTID_ADDR_ADDR]]
+// CHECK: [[GTID:%.+]] = load i{{[0-9]+}}* [[GTID_REF]]
// CHECK: call void @__kmpc_barrier(%{{.+}}* [[IMPLICIT_BARRIER_LOC]], i{{[0-9]+}} [[GTID]])
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]* [[VAR_PRIV]])
// CHECK-DAG: call {{.*}} [[S_INT_TY_DESTR]]([[S_INT_TY]]*