From: Alexey Bataev Date: Thu, 10 Oct 2019 17:28:10 +0000 (+0000) Subject: [OPENMP50]Support for declare variant directive for NVPTX target. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=b04fa26eaf685c7be5e8bec4fd29dee02b85ea02;p=clang [OPENMP50]Support for declare variant directive for NVPTX target. NVPTX does not support global aliases. Instead, we have to copy the full body of the variant function for the original function. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@374387 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp index 2ad6d01fda..7626f7a43c 100644 --- a/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/lib/CodeGen/CGOpenMPRuntime.cpp @@ -1264,9 +1264,10 @@ CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule &CGM, StringRef FirstSeparator, loadOffloadInfoMetadata(); } -static bool tryEmitAlias(CodeGenModule &CGM, const GlobalDecl &NewGD, - const GlobalDecl &OldGD, llvm::GlobalValue *OrigAddr, - bool IsForDefinition) { +bool CGOpenMPRuntime::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { // Emit at least a definition for the aliasee if the the address of the // original function is requested. if (IsForDefinition || OrigAddr) @@ -1327,8 +1328,8 @@ void CGOpenMPRuntime::clear() { StringRef MangledName = CGM.getMangledName(Pair.second.second); llvm::GlobalValue *Addr = CGM.GetGlobalValue(MangledName); // If not able to emit alias, just emit original declaration. - (void)tryEmitAlias(CGM, Pair.second.first, Pair.second.second, Addr, - /*IsForDefinition=*/false); + (void)tryEmitDeclareVariant(Pair.second.first, Pair.second.second, Addr, + /*IsForDefinition=*/false); } } @@ -11273,7 +11274,7 @@ bool CGOpenMPRuntime::emitDeclareVariant(GlobalDecl GD, bool IsForDefinition) { if (NewFD == D) return false; GlobalDecl NewGD = GD.getWithDecl(NewFD); - if (tryEmitAlias(CGM, NewGD, GD, Orig, IsForDefinition)) { + if (tryEmitDeclareVariant(NewGD, GD, Orig, IsForDefinition)) { DeferredVariantFunction.erase(D); return true; } diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h index b8137a20d0..9215bd666c 100644 --- a/lib/CodeGen/CGOpenMPRuntime.h +++ b/lib/CodeGen/CGOpenMPRuntime.h @@ -291,6 +291,17 @@ protected: /// default location. virtual unsigned getDefaultLocationReserved2Flags() const { return 0; } + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + virtual bool tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition); + /// Returns default flags for the barriers depending on the directive, for /// which this barier is going to be emitted. static unsigned getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind); diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp index 9e70a5a9bc..83f74fef3b 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp @@ -1895,6 +1895,19 @@ unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const { llvm_unreachable("Unknown flags are requested."); } +bool CGOpenMPRuntimeNVPTX::tryEmitDeclareVariant(const GlobalDecl &NewGD, + const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) { + // Emit the function in OldGD with the body from NewGD, if NewGD is defined. + auto *NewFD = cast(NewGD.getDecl()); + if (NewFD->isDefined()) { + CGM.emitOpenMPDeviceFunctionRedefinition(OldGD, NewGD, OrigAddr); + return true; + } + return false; +} + CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM) : CGOpenMPRuntime(CGM, "_", "$") { if (!CGM.getLangOpts().OpenMPIsDevice) diff --git a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h index e7fd458e72..0f78627c95 100644 --- a/lib/CodeGen/CGOpenMPRuntimeNVPTX.h +++ b/lib/CodeGen/CGOpenMPRuntimeNVPTX.h @@ -193,6 +193,18 @@ protected: /// Full/Lightweight runtime mode. Used for better optimization. unsigned getDefaultLocationReserved2Flags() const override; + /// Tries to emit declare variant function for \p OldGD from \p NewGD. + /// \param OrigAddr LLVM IR value for \p OldGD. + /// \param IsForDefinition true, if requested emission for the definition of + /// \p OldGD. + /// \returns true, was able to emit a definition function for \p OldGD, which + /// points to \p NewGD. + /// NVPTX backend does not support global aliases, so just use the function, + /// emitted for \p NewGD instead of \p OldGD. + bool tryEmitDeclareVariant(const GlobalDecl &NewGD, const GlobalDecl &OldGD, + llvm::GlobalValue *OrigAddr, + bool IsForDefinition) override; + public: explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM); void clear() override; diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp index 080914afcc..eab48ccb9b 100644 --- a/lib/CodeGen/CodeGenModule.cpp +++ b/lib/CodeGen/CodeGenModule.cpp @@ -2742,6 +2742,50 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD, EmitGlobalFunctionDefinition(GD, GV); } +void CodeGenModule::emitOpenMPDeviceFunctionRedefinition( + GlobalDecl OldGD, GlobalDecl NewGD, llvm::GlobalValue *GV) { + assert(getLangOpts().OpenMP && getLangOpts().OpenMPIsDevice && + OpenMPRuntime && "Expected OpenMP device mode."); + const auto *D = cast(OldGD.getDecl()); + + // Compute the function info and LLVM type. + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(OldGD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + + // Get or create the prototype for the function. + if (!GV || (GV->getType()->getElementType() != Ty)) { + GV = cast(GetOrCreateLLVMFunction( + getMangledName(OldGD), Ty, GlobalDecl(), /*ForVTable=*/false, + /*DontDefer=*/true, /*IsThunk=*/false, llvm::AttributeList(), + ForDefinition)); + SetFunctionAttributes(OldGD, cast(GV), + /*IsIncompleteFunction=*/false, + /*IsThunk=*/false); + } + // We need to set linkage and visibility on the function before + // generating code for it because various parts of IR generation + // want to propagate this information down (e.g. to local static + // declarations). + auto *Fn = cast(GV); + setFunctionLinkage(OldGD, Fn); + + // FIXME: this is redundant with part of + // setFunctionDefinitionAttributes + setGVProperties(Fn, OldGD); + + MaybeHandleStaticInExternC(D, Fn); + + maybeSetTrivialComdat(*D, *Fn); + + CodeGenFunction(*this).GenerateCode(NewGD, Fn, FI); + + setNonAliasAttributes(OldGD, Fn); + SetLLVMFunctionAttributesForDefinition(D, Fn); + + if (D->hasAttr()) + AddGlobalAnnotations(D, Fn); +} + void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { const auto *D = cast(GD.getDecl()); diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h index 95964afed4..597b8d712c 100644 --- a/lib/CodeGen/CodeGenModule.h +++ b/lib/CodeGen/CodeGenModule.h @@ -1270,6 +1270,11 @@ public: /// \param D Requires declaration void EmitOMPRequiresDecl(const OMPRequiresDecl *D); + /// Emits the definition of \p OldGD function with body from \p NewGD. + /// Required for proper handling of declare variant directive on the GPU. + void emitOpenMPDeviceFunctionRedefinition(GlobalDecl OldGD, GlobalDecl NewGD, + llvm::GlobalValue *GV); + /// Returns whether the given record has hidden LTO visibility and therefore /// may participate in (single-module) CFI and whole-program vtable /// optimization. diff --git a/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp b/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp new file mode 100644 index 0000000000..04870f0845 --- /dev/null +++ b/test/OpenMP/nvptx_declare_variant_implementation_vendor_codegen.cpp @@ -0,0 +1,158 @@ +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -fopenmp-version=50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='ret i32 {{1|81|84}}' +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -emit-pch -o %t -fopenmp-version=50 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -include-pch %t -o - -fopenmp-version=50 | FileCheck %s --implicit-check-not='ret i32 {{1|81|84}}' +// expected-no-diagnostics + +// CHECK-NOT: ret i32 {{1|81|84}} +// CHECK-DAG: define {{.*}}i32 @_Z3barv() +// CHECK-DAG: define {{.*}}i32 @_ZN16SpecSpecialFuncs6MethodEv(%struct.SpecSpecialFuncs* %{{.+}}) +// CHECK-DAG: define {{.*}}i32 @_ZN12SpecialFuncs6MethodEv(%struct.SpecialFuncs* %{{.+}}) +// CHECK-DAG: define linkonce_odr {{.*}}i32 @_ZN16SpecSpecialFuncs6methodEv(%struct.SpecSpecialFuncs* %{{.+}}) +// CHECK-DAG: define linkonce_odr {{.*}}i32 @_ZN12SpecialFuncs6methodEv(%struct.SpecialFuncs* %{{.+}}) +// CHECK-DAG: define {{.*}}i32 @_Z5prio_v() +// CHECK-DAG: define internal i32 @_ZL6prio1_v() +// CHECK-DAG: define {{.*}}i32 @_Z4callv() +// CHECK-DAG: define internal i32 @_ZL9stat_usedv() +// CHECK-DAG: define {{.*}}i32 @fn_linkage() +// CHECK-DAG: define {{.*}}i32 @_Z11fn_linkage1v() + +// CHECK-DAG: ret i32 2 +// CHECK-DAG: ret i32 3 +// CHECK-DAG: ret i32 4 +// CHECK-DAG: ret i32 5 +// CHECK-DAG: ret i32 6 +// CHECK-DAG: ret i32 7 +// CHECK-DAG: ret i32 82 +// CHECK-DAG: ret i32 83 +// CHECK-DAG: ret i32 85 +// CHECK-DAG: ret i32 86 +// CHECK-DAG: ret i32 87 + +// Outputs for function members +// CHECK-DAG: ret i32 6 +// CHECK-DAG: ret i32 7 +// CHECK-NOT: ret i32 {{1|81|84}} + +#ifndef HEADER +#define HEADER + +int foo() { return 2; } +int bazzz(); +int test(); +static int stat_unused_(); +static int stat_used_(); + +#pragma omp declare target + +#pragma omp declare variant(foo) match(implementation = {vendor(llvm)}) +int bar() { return 1; } + +#pragma omp declare variant(bazzz) match(implementation = {vendor(llvm)}) +int baz() { return 1; } + +#pragma omp declare variant(test) match(implementation = {vendor(llvm)}) +int call() { return 1; } + +#pragma omp declare variant(stat_unused_) match(implementation = {vendor(llvm)}) +static int stat_unused() { return 1; } + +#pragma omp declare variant(stat_used_) match(implementation = {vendor(llvm)}) +static int stat_used() { return 1; } + +#pragma omp end declare target + +int main() { + int res; +#pragma omp target map(from \ + : res) + res = bar() + baz() + call(); + return res; +} + +int test() { return 3; } +static int stat_unused_() { return 4; } +static int stat_used_() { return 5; } + +#pragma omp declare target + +struct SpecialFuncs { + void vd() {} + SpecialFuncs(); + ~SpecialFuncs(); + + int method_() { return 6; } +#pragma omp declare variant(SpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int method() { return 1; } +#pragma omp declare variant(SpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int Method(); +} s; + +int SpecialFuncs::Method() { return 1; } + +struct SpecSpecialFuncs { + void vd() {} + SpecSpecialFuncs(); + ~SpecSpecialFuncs(); + + int method_(); +#pragma omp declare variant(SpecSpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int method() { return 1; } +#pragma omp declare variant(SpecSpecialFuncs::method_) \ + match(implementation = {vendor(llvm)}) + int Method(); +} s1; + +#pragma omp end declare target + +int SpecSpecialFuncs::method_() { return 7; } +int SpecSpecialFuncs::Method() { return 1; } + +int prio() { return 81; } +int prio1() { return 82; } +static int prio2() { return 83; } +static int prio3() { return 84; } +static int prio4() { return 84; } +int fn_linkage_variant() { return 85; } +extern "C" int fn_linkage_variant1() { return 86; } +int fn_variant2() { return 1; } + +#pragma omp declare target + +void xxx() { + (void)s.method(); + (void)s1.method(); +} + +#pragma omp declare variant(prio) match(implementation = {vendor(llvm)}) +#pragma omp declare variant(prio1) match(implementation = {vendor(score(1) \ + : llvm)}) +int prio_() { return 1; } + +#pragma omp declare variant(prio4) match(implementation = {vendor(score(3) \ + : llvm)}) +#pragma omp declare variant(prio2) match(implementation = {vendor(score(5) \ + : llvm)}) +#pragma omp declare variant(prio3) match(implementation = {vendor(score(1) \ + : llvm)}) +static int prio1_() { return 1; } + +int int_fn() { return prio1_(); } + +extern "C" { +#pragma omp declare variant(fn_linkage_variant) match(implementation = {vendor(llvm)}) +int fn_linkage() { return 1; } +} + +#pragma omp declare variant(fn_linkage_variant1) match(implementation = {vendor(llvm)}) +int fn_linkage1() { return 1; } + +#pragma omp declare variant(fn_variant2) match(implementation = {vendor(llvm, ibm)}) +int fn2() { return 87; } + +#pragma omp end declare target + +#endif // HEADER