From cf45efc91832952f461ed71030b3d00e816cfd49 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Fri, 4 Aug 2017 19:46:10 +0000 Subject: [PATCH] [OPENMP][DEBUG] Set proper address space info if required by target. Arguments, passed to the outlined function, must have correct address space info for proper Debug info support. Patch sets global address space for arguments that are mapped and passed by reference. Also, cuda-gdb does not handle reference types correctly, so reference arguments are represented as pointers. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@310104 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Basic/Attr.td | 8 ++ include/clang/Sema/Sema.h | 5 + lib/CodeGen/CGOpenMPRuntime.h | 27 ++++++ lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 79 +++++++++++++++ lib/CodeGen/CGOpenMPRuntimeNVPTX.h | 26 +++++ lib/CodeGen/CGStmtOpenMP.cpp | 55 ++++++++--- lib/Sema/SemaExpr.cpp | 2 + lib/Sema/SemaOpenMP.cpp | 33 +++++++ .../nvptx_target_firstprivate_codegen.cpp | 29 +++--- test/OpenMP/target_parallel_debug_codegen.cpp | 97 +++++++++++++++++++ 10 files changed, 335 insertions(+), 26 deletions(-) create mode 100644 test/OpenMP/target_parallel_debug_codegen.cpp diff --git a/include/clang/Basic/Attr.td b/include/clang/Basic/Attr.td index 3de9e8e8dd..431905ece4 100644 --- a/include/clang/Basic/Attr.td +++ b/include/clang/Basic/Attr.td @@ -2685,6 +2685,14 @@ def OMPCaptureNoInit : InheritableAttr { let Documentation = [Undocumented]; } +def OMPCaptureKind : Attr { + // This attribute has no spellings as it is only ever created implicitly. + let Spellings = []; + let SemaHandler = 0; + let Args = [UnsignedArgument<"CaptureKind">]; + let Documentation = [Undocumented]; +} + def OMPDeclareSimdDecl : Attr { let Spellings = [Pragma<"omp", "declare simd">]; let Subjects = SubjectList<[Function]>; diff --git a/include/clang/Sema/Sema.h b/include/clang/Sema/Sema.h index e804a7962a..30fb56e338 100644 --- a/include/clang/Sema/Sema.h +++ b/include/clang/Sema/Sema.h @@ -8527,6 +8527,11 @@ public: /// is performed. bool isOpenMPPrivateDecl(ValueDecl *D, unsigned Level); + /// Sets OpenMP capture kind (OMPC_private, OMPC_firstprivate, OMPC_map etc.) + /// for \p FD based on DSA for the provided corresponding captured declaration + /// \p D. + void setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level); + /// \brief Check if the specified variable is captured by 'target' directive. /// \param Level Relative level of nested OpenMP construct for that the check /// is performed. diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index 809d9fa0a9..c0682e2be9 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -1325,6 +1325,33 @@ public: virtual void emitDoacrossOrdered(CodeGenFunction &CGF, const OMPDependClause *C); + /// Translates argument of outlined function if this is required for target. + /// \param FD A field for the corresponding captured variable in the captured + /// record. + /// \param NativeParam Native parameter of the outlined function. + virtual const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + return NativeParam; + } + + typedef llvm::function_ref + MappingFnType; + /// Maps the native argument to the address of the target-specific argument. + /// \param FD A field for the corresponding captured variable in the captured + /// record. + /// \param NativeParam Native parameter of the outlined function. + /// \param TargetParam Target-specific parameter of the outlined function + /// (provided by the \a CGOpenMPRuntime::translateParameter). + /// \param MapFn Mapping function that maps the address of the \p NativeParam + /// to the address of the \p TargetParam. + virtual void mapParameterAddress(CodeGenFunction &CGF, const FieldDecl *FD, + const VarDecl *NativeParam, + const VarDecl *TargetParam, + const MappingFnType MapFn) const { + assert(NativeParam == TargetParam && + "native and target args must be the same"); + } + /// Emits call of the outlined function with the provided arguments, /// translating these arguments to correct target-specific arguments. virtual void diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index d6a8257796..4f39e7c2ca 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -2238,3 +2238,82 @@ void CGOpenMPRuntimeNVPTX::emitReduction( CGF.EmitBranch(DefaultBB); CGF.EmitBlock(DefaultBB, /*IsFinished=*/true); } + +const VarDecl * +CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const { + if (!NativeParam->getType()->isReferenceType()) + return NativeParam; + QualType ArgType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(ArgType); + QualType PointeeTy = cast(NonQualTy)->getPointeeType(); + if (const auto *Attr = FD->getAttr()) { + if (Attr->getCaptureKind() == OMPC_map) { + PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy, + LangAS::opencl_global); + } + } + ArgType = CGM.getContext().getPointerType(PointeeTy); + QC.addRestrict(); + enum { NVPTX_local_addr = 5 }; + QC.addAddressSpace(NVPTX_local_addr); + ArgType = QC.apply(CGM.getContext(), ArgType); + return ImplicitParamDecl::Create( + CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(), + NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other); +} + +void CGOpenMPRuntimeNVPTX::mapParameterAddress( + CodeGenFunction &CGF, const FieldDecl *FD, const VarDecl *NativeParam, + const VarDecl *TargetParam, + const CGOpenMPRuntime::MappingFnType MapFn) const { + if (!NativeParam->getType()->isReferenceType() || NativeParam == TargetParam) + return; + assert(NativeParam != TargetParam && + "Native arg must not be the same as target arg."); + Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam); + QualType NativeParamType = NativeParam->getType(); + QualifierCollector QC; + const Type *NonQualTy = QC.strip(NativeParamType); + QualType NativePointeeTy = cast(NonQualTy)->getPointeeType(); + unsigned NativePointeeAddrSpace = + NativePointeeTy.getQualifiers().getAddressSpace(); + QualType TargetPointeeTy = TargetParam->getType()->getPointeeType(); + llvm::Value *TargetAddr = CGF.EmitLoadOfScalar( + LocalAddr, /*Volatile=*/false, TargetPointeeTy, SourceLocation()); + // First cast to generic. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + // Cast from generic to native address space. + TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo( + NativePointeeAddrSpace)); + Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType); + CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false, + NativeParam->getType()); + MapFn(CGF, NativeParam, NativeParamAddr); +} + +void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall( + CodeGenFunction &CGF, llvm::Value *OutlinedFn, + ArrayRef Args) const { + SmallVector TargetArgs; + auto *FnType = + cast(OutlinedFn->getType()->getPointerElementType()); + for (unsigned I = 0, E = Args.size(); I < E; ++I) { + llvm::Type *TargetType = FnType->getParamType(I); + llvm::Value *NativeArg = Args[I]; + if (!TargetType->isPointerTy()) { + TargetArgs.emplace_back(NativeArg); + continue; + } + llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast( + NativeArg, NativeArg->getType()->getPointerElementType()->getPointerTo( + /*AddrSpace=*/0)); + TargetArgs.emplace_back( + CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType)); + } + CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, OutlinedFn, TargetArgs); +} diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index ae25e94759..66b12d69a8 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -268,6 +268,32 @@ public: /// \return Specified function. llvm::Constant *createNVPTXRuntimeFunction(unsigned Function); + /// Translates argument of outlined function if this is required for target. + /// \param FD A field for the corresponding captured variable in the captured + /// record. + /// \param NativeParam Native parameter of the outlined function. + const VarDecl *translateParameter(const FieldDecl *FD, + const VarDecl *NativeParam) const override; + + /// Maps the native argument to the address of the target-specific argument. + /// \param FD A field for the corresponding captured variable in the captured + /// record. + /// \param NativeParam Native parameter of the outlined function. + /// \param TargetParam Target-specific parameter of the outlined function + /// (provided by the \a CGOpenMPRuntime::translateParameter). + /// \param MapFn Mapping function that maps the address of the \p NativeParam + /// to the address of the \p TargetParam. + void mapParameterAddress(CodeGenFunction &CGF, const FieldDecl *FD, + const VarDecl *NativeParam, + const VarDecl *TargetParam, + const MappingFnType MapFn) const override; + + /// Emits call of the outlined function with the provided arguments, + /// translating these arguments to correct target-specific arguments. + void emitOutlinedFunctionCall( + CodeGenFunction &CGF, llvm::Value *OutlinedFn, + ArrayRef Args = llvm::None) const override; + /// Target codegen is specialized based on two programming models: the /// 'generic' fork-join model of OpenMP, and a more GPU efficient 'spmd' /// model for constructs like 'target parallel' that support it. diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp index 436a04fbeb..e7e5976e60 100644 --- a/lib/CodeGen/CGStmtOpenMP.cpp +++ b/lib/CodeGen/CGStmtOpenMP.cpp @@ -252,12 +252,15 @@ namespace { bool RegisterCastedArgsOnly = false; /// Name of the generated function. StringRef FunctionName; + /// Function that maps given variable declaration to the specified address. + const CGOpenMPRuntime::MappingFnType MapFn; explicit FunctionOptions(const CapturedStmt *S, bool UIntPtrCastRequired, bool RegisterCastedArgsOnly, - StringRef FunctionName) + StringRef FunctionName, + const CGOpenMPRuntime::MappingFnType MapFn) : S(S), UIntPtrCastRequired(UIntPtrCastRequired), RegisterCastedArgsOnly(UIntPtrCastRequired && RegisterCastedArgsOnly), - FunctionName(FunctionName) {} + FunctionName(FunctionName), MapFn(MapFn) {} }; } @@ -276,9 +279,13 @@ static std::pair emitOutlinedFunctionPrologue( // Build the argument list. CodeGenModule &CGM = CGF.CGM; ASTContext &Ctx = CGM.getContext(); + FunctionArgList TargetArgs; bool HasUIntPtrArgs = false; Args.append(CD->param_begin(), std::next(CD->param_begin(), CD->getContextParamPosition())); + TargetArgs.append( + CD->param_begin(), + std::next(CD->param_begin(), CD->getContextParamPosition())); auto I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { QualType ArgType = FD->getType(); @@ -308,19 +315,28 @@ static std::pair emitOutlinedFunctionPrologue( } if (ArgType->isVariablyModifiedType()) ArgType = getCanonicalParamType(Ctx, ArgType.getNonReferenceType()); - Args.push_back(ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, - FD->getLocation(), II, ArgType, - ImplicitParamDecl::Other)); + auto *Arg = + ImplicitParamDecl::Create(Ctx, /*DC=*/nullptr, FD->getLocation(), II, + ArgType, ImplicitParamDecl::Other); + Args.emplace_back(Arg); + // Do not cast arguments if we emit function with non-original types. + TargetArgs.emplace_back( + FO.UIntPtrCastRequired + ? Arg + : CGM.getOpenMPRuntime().translateParameter(FD, Arg)); ++I; } Args.append( std::next(CD->param_begin(), CD->getContextParamPosition() + 1), CD->param_end()); + TargetArgs.append( + std::next(CD->param_begin(), CD->getContextParamPosition() + 1), + CD->param_end()); // Create the function declaration. FunctionType::ExtInfo ExtInfo; const CGFunctionInfo &FuncInfo = - CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, Args); + CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, TargetArgs); llvm::FunctionType *FuncLLVMTy = CGM.getTypes().GetFunctionType(FuncInfo); llvm::Function *F = @@ -331,16 +347,19 @@ static std::pair emitOutlinedFunctionPrologue( F->setDoesNotThrow(); // Generate the function. - CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, Args, CD->getLocation(), + CGF.StartFunction(CD, Ctx.VoidTy, F, FuncInfo, TargetArgs, CD->getLocation(), CD->getBody()->getLocStart()); unsigned Cnt = CD->getContextParamPosition(); I = FO.S->captures().begin(); for (auto *FD : RD->fields()) { + // Do not map arguments if we emit function with non-original types. + CGM.getOpenMPRuntime().mapParameterAddress(CGF, FD, Args[Cnt], + TargetArgs[Cnt], FO.MapFn); + Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If we are capturing a pointer by copy we don't need to do anything, just // use the value that we get from the arguments. if (I->capturesVariableByCopy() && FD->getType()->isAnyPointerType()) { const VarDecl *CurVD = I->getCapturedVar(); - Address LocalAddr = CGF.GetAddrOfLocalVar(Args[Cnt]); // If the variable is a reference we need to materialize it here. if (CurVD->getType()->isReferenceType()) { Address RefAddr = CGF.CreateMemTemp( @@ -357,8 +376,8 @@ static std::pair emitOutlinedFunctionPrologue( } LValueBaseInfo BaseInfo(AlignmentSource::Decl, false); - LValue ArgLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(Args[Cnt]), - Args[Cnt]->getType(), BaseInfo); + LValue ArgLVal = + CGF.MakeAddrLValue(LocalAddr, Args[Cnt]->getType(), BaseInfo); if (FD->hasCapturedVLAType()) { if (FO.UIntPtrCastRequired) { ArgLVal = CGF.MakeAddrLValue(castValueFromUintptr(CGF, FD->getType(), @@ -428,8 +447,15 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { FunctionArgList Args; llvm::DenseMap> LocalAddrs; llvm::DenseMap> VLASizes; - FunctionOptions FO(&S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, - CapturedStmtInfo->getHelperName()); + FunctionOptions FO( + &S, !NeedWrapperFunction, /*RegisterCastedArgsOnly=*/false, + CapturedStmtInfo->getHelperName(), + [NeedWrapperFunction](CodeGenFunction &CGF, const VarDecl *VD, + Address Addr) { + assert(NeedWrapperFunction && "Function should not be called if " + "wrapper function is not required."); + CGF.setAddrOfLocalVar(VD, Addr); + }); llvm::Function *F; bool HasUIntPtrArgs; std::tie(F, HasUIntPtrArgs) = emitOutlinedFunctionPrologue( @@ -452,7 +478,10 @@ CodeGenFunction::GenerateOpenMPCapturedStmtFunction(const CapturedStmt &S) { llvm::raw_svector_ostream Out(Buffer); Out << "__nondebug_wrapper_" << CapturedStmtInfo->getHelperName(); FunctionOptions WrapperFO(&S, /*UIntPtrCastRequired=*/true, - /*RegisterCastedArgsOnly=*/true, Out.str()); + /*RegisterCastedArgsOnly=*/true, Out.str(), + [](CodeGenFunction &, const VarDecl *, Address) { + llvm_unreachable("Function should not be called"); + }); CodeGenFunction WrapperCGF(CGM, /*suppressNewContext=*/true); WrapperCGF.disableDebugInfo(); Args.clear(); diff --git a/lib/Sema/SemaExpr.cpp b/lib/Sema/SemaExpr.cpp index 28b9419111..e482337df2 100644 --- a/lib/Sema/SemaExpr.cpp +++ b/lib/Sema/SemaExpr.cpp @@ -14013,6 +14013,8 @@ static bool captureInCapturedRegion(CapturedRegionScopeInfo *RSI, Field->setImplicit(true); Field->setAccess(AS_private); RD->addDecl(Field); + if (S.getLangOpts().OpenMP && RSI->CapRegionKind == CR_OpenMP) + S.setOpenMPCaptureKind(Field, Var, RSI->OpenMPLevel); CopyExpr = new (S.Context) DeclRefExpr(Var, RefersToCapturedVariable, DeclRefType, VK_LValue, Loc); diff --git a/lib/Sema/SemaOpenMP.cpp b/lib/Sema/SemaOpenMP.cpp index 72a7daa919..8394a51531 100644 --- a/lib/Sema/SemaOpenMP.cpp +++ b/lib/Sema/SemaOpenMP.cpp @@ -1327,6 +1327,39 @@ bool Sema::isOpenMPPrivateDecl(ValueDecl *D, unsigned Level) { DSAStack->isTaskgroupReductionRef(D, Level)); } +void Sema::setOpenMPCaptureKind(FieldDecl *FD, ValueDecl *D, unsigned Level) { + assert(LangOpts.OpenMP && "OpenMP is not allowed"); + D = getCanonicalDecl(D); + OpenMPClauseKind OMPC = OMPC_unknown; + for (unsigned I = DSAStack->getNestingLevel() + 1; I > Level; --I) { + const unsigned NewLevel = I - 1; + if (DSAStack->hasExplicitDSA(D, + [&OMPC](const OpenMPClauseKind K) { + if (isOpenMPPrivate(K)) { + OMPC = K; + return true; + } + return false; + }, + NewLevel)) + break; + if (DSAStack->checkMappableExprComponentListsForDeclAtLevel( + D, NewLevel, + [](OMPClauseMappableExprCommon::MappableExprComponentListRef, + OpenMPClauseKind) { return true; })) { + OMPC = OMPC_map; + break; + } + if (DSAStack->hasExplicitDirective(isOpenMPTargetExecutionDirective, + NewLevel)) { + OMPC = OMPC_firstprivate; + break; + } + } + if (OMPC != OMPC_unknown) + FD->addAttr(OMPCaptureKindAttr::CreateImplicit(Context, OMPC)); +} + bool Sema::isOpenMPTargetCapturedDecl(ValueDecl *D, unsigned Level) { assert(LangOpts.OpenMP && "OpenMP is not allowed"); // Return true if the current level is no longer enclosed in a target region. diff --git a/test/OpenMP/nvptx_target_firstprivate_codegen.cpp b/test/OpenMP/nvptx_target_firstprivate_codegen.cpp index 5dcff8e548..a3abbb5e3c 100644 --- a/test/OpenMP/nvptx_target_firstprivate_codegen.cpp +++ b/test/OpenMP/nvptx_target_firstprivate_codegen.cpp @@ -1,9 +1,9 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-64 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 +// RUN: %clang_cc1 -debug-info-kind=limited -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix TCHECK --check-prefix TCHECK-32 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -24,13 +24,15 @@ int foo(int n, double *ptr) { double c[5][10]; TT d; - #pragma omp target firstprivate(a) + #pragma omp target firstprivate(a) map(tofrom: b) { + b[a] = a; } - // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}([10 x float] addrspace(1)* noalias [[B_IN:%.+]], i{{[0-9]+}} [[A_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, + // TCHECK-64: call void @llvm.dbg.declare(metadata [10 x float] addrspace(1)** %{{.+}}, metadata !{{[0-9]+}}, metadata ![[LOCAL:[0-9]+]]) // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: ret void @@ -45,7 +47,7 @@ int foo(int n, double *ptr) { // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the // target region - // TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A2_IN:%.+]], [10 x float]* {{.+}} [[B_IN:%.+]], [5 x [10 x double]]* {{.+}} [[C_IN:%.+]], [[TT]]* {{.+}} [[D_IN:%.+]]) + // TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A2_IN:%.+]], [10 x float]*{{.*}} [[B_IN:%.+]], [5 x [10 x double]]*{{.*}} [[C_IN:%.+]], [[TT]]*{{.*}} [[D_IN:%.+]]) // TCHECK: [[A2_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x float]*, // TCHECK: [[C_ADDR:%.+]] = alloca [5 x [10 x double]]*, @@ -58,10 +60,12 @@ int foo(int n, double *ptr) { // TCHECK: store [10 x float]* [[B_IN]], [10 x float]** [[B_ADDR]], // TCHECK: store [5 x [10 x double]]* [[C_IN]], [5 x [10 x double]]** [[C_ADDR]], // TCHECK: store [[TT]]* [[D_IN]], [[TT]]** [[D_ADDR]], - // TCHECK: [[CONV_A2ADDR:%.+]] = bitcast i{{[0-9]+}}* [[A2_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** [[B_ADDR]], + // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x float]*, [10 x float]** % // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** [[C_ADDR]], + // TCHECK: [[C_ADDR_REF:%.+]] = load [5 x [10 x double]]*, [5 x [10 x double]]** % // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** [[D_ADDR]], + // TCHECK: [[D_ADDR_REF:%.+]] = load [[TT]]*, [[TT]]** % // firstprivate(aa): a_priv = a_in @@ -80,7 +84,7 @@ int foo(int n, double *ptr) { // TCHECK: [[D_IN_BCAST:%.+]] = bitcast [[TT]]* [[D_ADDR_REF]] to i8* // TCHECK: call void @llvm.memcpy.{{.+}}(i8* [[D_PRIV_BCAST]], i8* [[D_IN_BCAST]],{{.+}}) - // TCHECK: load i16, i16* [[CONV_A2ADDR]], + // TCHECK: load i16, i16* [[A2_ADDR]], #pragma omp target firstprivate(ptr) @@ -129,7 +133,7 @@ int fstatic(int n) { return a; } -// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i{{[0-9]+}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define {{.*}}void @__omp_offloading_{{.+}}(i{{[0-9]+}}{{.*}} [[A_IN:%.+]], i{{[0-9]+}}{{.*}} [[A3_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[A3_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, @@ -138,9 +142,8 @@ int fstatic(int n) { // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store i{{[0-9]+}} [[A3_IN]], i{{[0-9]+}}* [[A3_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* -// TCHECK: [[A3_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A3_ADDR]] to i8* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % // firstprivate(a): a_priv = a_in @@ -169,7 +172,7 @@ struct S1 { return (int)b; } - // TCHECK: define void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) + // TCHECK: define internal void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]]) // TCHECK: [[TH_ADDR:%.+]] = alloca [[S1]]*, // TCHECK: [[B_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK-NOT: alloca i{{[0-9]+}}, @@ -200,15 +203,15 @@ int bar(int n, double *ptr){ // template -// TCHECK: define void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) +// TCHECK: define internal void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]]) // TCHECK: [[A_ADDR:%.+]] = alloca i{{[0-9]+}}, // TCHECK: [[B_ADDR:%.+]] = alloca [10 x i{{[0-9]+}}]*, // TCHECK-NOT: alloca i{{[0-9]+}}, // TCHECK: [[B_PRIV:%.+]] = alloca [10 x i{{[0-9]+}}], // TCHECK: store i{{[0-9]+}} [[A_IN]], i{{[0-9]+}}* [[A_ADDR]], // TCHECK: store [10 x i{{[0-9]+}}]* [[B_IN]], [10 x i{{[0-9]+}}]** [[B_ADDR]], -// TCHECK-64: [[A_ADDR_CONV:%.+]] = bitcast i{{[0-9]+}}* [[A_ADDR]] to i{{[0-9]+}}* // TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** [[B_ADDR]], +// TCHECK: [[B_ADDR_REF:%.+]] = load [10 x i{{[0-9]+}}]*, [10 x i{{[0-9]+}}]** % // firstprivate(a) // TCHECK-NOT: store i{{[0-9]+}} %{{.+}}, i{{[0-9]+}}* diff --git a/test/OpenMP/target_parallel_debug_codegen.cpp b/test/OpenMP/target_parallel_debug_codegen.cpp new file mode 100644 index 0000000000..41ef09cfde --- /dev/null +++ b/test/OpenMP/target_parallel_debug_codegen.cpp @@ -0,0 +1,97 @@ +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -debug-info-kind=limited | FileCheck %s +// expected-no-diagnostics + +int main() { + /* int(*b)[a]; */ + /* int *(**c)[a]; */ + int a; + int b[10][10]; + int c[10][10][10]; +#pragma omp target parallel firstprivate(a, b) map(tofrom \ + : c) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + } +#pragma omp target parallel firstprivate(a) map(tofrom \ + : c, b) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + } +#pragma omp target parallel map(tofrom \ + : a, c, b) + { + int &f = c[1][1][1]; + int &g = a; + int &h = b[1][1]; + int d = 15; + a = 5; + b[0][a] = 10; + c[0][0][a] = 11; + b[0][a] = c[0][0][a]; + } + return 0; +} + +// CHECK: define internal void @__omp_offloading{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define internal void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: call void [[NONDEBUG_WRAPPER:.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* {{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + +// CHECK: define internal void [[NONDEBUG_WRAPPER]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__nondebug_wrapper___omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]]* dereferenceable{{[^,]+}}, i64 {{[^,]+}}, [10 x [10 x i32]]* dereferenceable{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define void @__omp_offloading_{{[^(]+}}([10 x [10 x [10 x i32]]] addrspace(1)* noalias {{[^,]+}}, i32 addrspace(1)* noalias {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias {{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* +// CHECK: addrspacecast [10 x [10 x [10 x i32]]]* %{{.+}} to [10 x [10 x [10 x i32]]] addrspace(1)* +// CHECK: addrspacecast i32* %{{.+}} to i32 addrspace(1)* +// CHECK: addrspacecast [10 x [10 x i32]]* %{{.+}} to [10 x [10 x i32]] addrspace(1)* +// CHECK: call void [[DEBUG_PARALLEL:@.+]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* {{[^,]+}}, i32 addrspace(1)* {{[^,]+}}, [10 x [10 x i32]] addrspace(1)* {{[^)]+}}) + +// CHECK: define internal void [[DEBUG_PARALLEL]](i32* {{[^,]+}}, i32* {{[^,]+}}, [10 x [10 x [10 x i32]]] addrspace(1)* noalias{{[^,]+}}, i32 addrspace(1)* noalias{{[^,]+}}, [10 x [10 x i32]] addrspace(1)* noalias{{[^)]+}}) +// CHECK: addrspacecast [10 x [10 x [10 x i32]]] addrspace(1)* %{{.+}} to [10 x [10 x [10 x i32]]]* +// CHECK: addrspacecast i32 addrspace(1)* %{{.+}} to i32* +// CHECK: addrspacecast [10 x [10 x i32]] addrspace(1)* %{{.+}} to [10 x [10 x i32]]* + -- 2.40.0