}
void CodeGenModule::EmitOMPRequiresDecl(const OMPRequiresDecl *D) {
- //Do nothing - here to avoid build errors
+ getOpenMPRuntime().checkArchForUnifiedAddressing(*this, D);
}
virtual void
adjustTargetSpecificDataForLambdas(CodeGenFunction &CGF,
const OMPExecutableDirective &D) const;
+
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ virtual void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const {}
};
/// Class supports emissionof SIMD-only code.
}
}
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+ if (!CGM.getTarget().hasFeature("ptx"))
+ return CudaArch::UNKNOWN;
+ llvm::StringMap<bool> Features;
+ CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+ CGM.getTarget().getTargetOpts().CPU,
+ CGM.getTarget().getTargetOpts().Features);
+ for (const auto &Feature : Features) {
+ if (Feature.getValue()) {
+ CudaArch Arch = StringToCudaArch(Feature.getKey());
+ if (Arch != CudaArch::UNKNOWN)
+ return Arch;
+ }
+ }
+ return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeNVPTX::checkArchForUnifiedAddressing(
+ CodeGenModule &CGM, const OMPRequiresDecl *D) const {
+ for (const OMPClause *Clause : D->clauselists()) {
+ if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+ switch (getCudaArch(CGM)) {
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ CGM.Error(Clause->getBeginLoc(),
+ "Target architecture does not support unified addressing");
+ return;
+ case CudaArch::SM_70:
+ case CudaArch::SM_72:
+ case CudaArch::SM_75:
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX902:
+ case CudaArch::GFX904:
+ case CudaArch::GFX906:
+ case CudaArch::GFX909:
+ case CudaArch::UNKNOWN:
+ break;
+ case CudaArch::LAST:
+ llvm_unreachable("Unexpected Cuda arch.");
+ }
+ }
+ }
+}
+
/// Get number of SMs and number of blocks per SM.
static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
std::pair<unsigned, unsigned> Data;
Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
if (Data.first && Data.second)
return Data;
- if (CGM.getTarget().hasFeature("ptx")) {
- llvm::StringMap<bool> Features;
- CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
- CGM.getTarget().getTargetOpts().CPU,
- CGM.getTarget().getTargetOpts().Features);
- for (const auto &Feature : Features) {
- if (Feature.getValue()) {
- switch (StringToCudaArch(Feature.getKey())) {
- case CudaArch::SM_20:
- case CudaArch::SM_21:
- case CudaArch::SM_30:
- case CudaArch::SM_32:
- case CudaArch::SM_35:
- case CudaArch::SM_37:
- case CudaArch::SM_50:
- case CudaArch::SM_52:
- case CudaArch::SM_53:
- return {16, 16};
- case CudaArch::SM_60:
- case CudaArch::SM_61:
- case CudaArch::SM_62:
- return {56, 32};
- case CudaArch::SM_70:
- case CudaArch::SM_72:
- case CudaArch::SM_75:
- return {84, 32};
- case CudaArch::GFX600:
- case CudaArch::GFX601:
- case CudaArch::GFX700:
- case CudaArch::GFX701:
- case CudaArch::GFX702:
- case CudaArch::GFX703:
- case CudaArch::GFX704:
- case CudaArch::GFX801:
- case CudaArch::GFX802:
- case CudaArch::GFX803:
- case CudaArch::GFX810:
- case CudaArch::GFX900:
- case CudaArch::GFX902:
- case CudaArch::GFX904:
- case CudaArch::GFX906:
- case CudaArch::GFX909:
- case CudaArch::UNKNOWN:
- break;
- case CudaArch::LAST:
- llvm_unreachable("Unexpected Cuda arch.");
- }
- }
- }
+ switch (getCudaArch(CGM)) {
+ case CudaArch::SM_20:
+ case CudaArch::SM_21:
+ case CudaArch::SM_30:
+ case CudaArch::SM_32:
+ case CudaArch::SM_35:
+ case CudaArch::SM_37:
+ case CudaArch::SM_50:
+ case CudaArch::SM_52:
+ case CudaArch::SM_53:
+ return {16, 16};
+ case CudaArch::SM_60:
+ case CudaArch::SM_61:
+ case CudaArch::SM_62:
+ return {56, 32};
+ case CudaArch::SM_70:
+ case CudaArch::SM_72:
+ case CudaArch::SM_75:
+ return {84, 32};
+ case CudaArch::GFX600:
+ case CudaArch::GFX601:
+ case CudaArch::GFX700:
+ case CudaArch::GFX701:
+ case CudaArch::GFX702:
+ case CudaArch::GFX703:
+ case CudaArch::GFX704:
+ case CudaArch::GFX801:
+ case CudaArch::GFX802:
+ case CudaArch::GFX803:
+ case CudaArch::GFX810:
+ case CudaArch::GFX900:
+ case CudaArch::GFX902:
+ case CudaArch::GFX904:
+ case CudaArch::GFX906:
+ case CudaArch::GFX909:
+ case CudaArch::UNKNOWN:
+ break;
+ case CudaArch::LAST:
+ llvm_unreachable("Unexpected Cuda arch.");
}
llvm_unreachable("Unexpected NVPTX target without ptx feature.");
}
void adjustTargetSpecificDataForLambdas(
CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+ /// Perform check on requires decl to ensure that target architecture
+ /// supports unified addressing
+ void checkArchForUnifiedAddressing(CodeGenModule &CGM,
+ const OMPRequiresDecl *D) const override;
+
private:
/// Track the execution mode when codegening directives within a target
/// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
--- /dev/null
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DREGION_HOST
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_20 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_21 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_30 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_32 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_35 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_37 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_50 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_52 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_53 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_60 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_61 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_62 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_70 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_72 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-nvidia-cuda -fopenmp-targets=nvptx64-nvidia-cuda -target-cpu sm_75 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t-out.ll -DREGION_DEVICE_NO_ERR
+
+#if defined(REGION_HOST) || defined(REGION_DEVICE_NO_ERR)
+// expected-no-diagnostics
+#pragma omp requires unified_shared_memory
+#endif
+
+#ifdef REGION_DEVICE
+#pragma omp requires unified_shared_memory // expected-error {{Target architecture does not support unified addressing}}
+#endif