From: Jan Vesely Date: Mon, 4 Dec 2017 22:57:29 +0000 (+0000) Subject: AMDGPU: Disable fp64 support on pre GCN asics X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=85c02734d1e7accd565651047337b405ec12311c;p=llvm AMDGPU: Disable fp64 support on pre GCN asics It's not implemented. Passing +fp64-fp16-denormal feature enables fp64 even on asics that don't support it v2: fix hasFP64 query Differential Revision: https://reviews.llvm.org/D39931 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319709 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 8e5a432e068..22d72462837 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -48,14 +48,27 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, // for SI has the unhelpful behavior that it unsets everything else if you // disable it. - SmallString<256> FullFS("+promote-alloca,+fp64-fp16-denormals,+dx10-clamp,+load-store-opt,"); + SmallString<256> FullFS("+promote-alloca,+dx10-clamp,+load-store-opt,"); + if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-address-space,+flat-for-global,+unaligned-buffer-access,+trap-handler,"; + // FIXME: I don't think think Evergreen has any useful support for + // denormals, but should be checked. Should we issue a warning somewhere + // if someone tries to enable these? + if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { + FullFS += "+fp64-fp16-denormals,"; + } else { + FullFS += "-fp32-denormals,"; + } + FullFS += FS; ParseSubtargetFeatures(GPU, FullFS); + // We don't support FP64 for EG/NI atm. + assert(!hasFP64() || (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es // on VI and newer hardware to avoid assertion failures due to missing ADDR64 // variants of MUBUF instructions. @@ -63,14 +76,6 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, FlatForGlobal = true; } - // FIXME: I don't think think Evergreen has any useful support for - // denormals, but should be checked. Should we issue a warning somewhere - // if someone tries to enable these? - if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - FP64FP16Denormals = false; - FP32Denormals = false; - } - // Set defaults if needed. if (MaxPrivateElementSize == 0) MaxPrivateElementSize = 4; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index c1518dd3d4c..99d0e191dd7 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -261,7 +261,7 @@ public: return HasVOP3PInsts; } - bool hasHWFP64() const { + bool hasFP64() const { return FP64; } diff --git a/lib/Target/AMDGPU/R600Processors.td b/lib/Target/AMDGPU/R600Processors.td index 8ef1fe191c3..aaca7a1b183 100644 --- a/lib/Target/AMDGPU/R600Processors.td +++ b/lib/Target/AMDGPU/R600Processors.td @@ -24,7 +24,7 @@ def : Processor<"rs880", R600_VLIW5_Itin, >; def : Processor<"rv670", R600_VLIW5_Itin, - [FeatureR600, FeatureWavefrontSize64, FeatureVertexCache, FeatureFP64] + [FeatureR600, FeatureWavefrontSize64, FeatureVertexCache] >; //===----------------------------------------------------------------------===// @@ -40,7 +40,7 @@ def : Processor<"rv730", R600_VLIW5_Itin, >; def : Processor<"rv770", R600_VLIW5_Itin, - [FeatureR700, FeatureWavefrontSize64, FeatureVertexCache, FeatureFP64] + [FeatureR700, FeatureWavefrontSize64, FeatureVertexCache] >; //===----------------------------------------------------------------------===// @@ -53,7 +53,7 @@ def : Processor<"cedar", R600_VLIW5_Itin, >; def : Processor<"cypress", R600_VLIW5_Itin, - [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache, FeatureFP64] + [FeatureEvergreen, FeatureWavefrontSize64, FeatureVertexCache] >; def : Processor<"juniper", R600_VLIW5_Itin, @@ -82,7 +82,7 @@ def : Processor<"caicos", R600_VLIW5_Itin, >; def : Processor<"cayman", R600_VLIW4_Itin, - [FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA] + [FeatureNorthernIslands, FeatureCaymanISA] >; def : Processor<"turks", R600_VLIW5_Itin, diff --git a/test/CodeGen/AMDGPU/fma.ll b/test/CodeGen/AMDGPU/fma.ll index 952bd1f2954..067ede7d4bd 100644 --- a/test/CodeGen/AMDGPU/fma.ll +++ b/test/CodeGen/AMDGPU/fma.ll @@ -1,5 +1,5 @@ ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s +; XUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s declare float @llvm.fma.f32(float, float, float) nounwind readnone declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) nounwind readnone