From: Matt Arsenault Date: Fri, 27 Jan 2017 20:21:31 +0000 (+0000) Subject: Merging r293310: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=974695d103f9d6957744e57ddadf1f2a42b1ad18;p=llvm Merging r293310: ------------------------------------------------------------------------ r293310 | arsenm | 2017-01-27 09:42:26 -0800 (Fri, 27 Jan 2017) | 8 lines AMDGPU: Enable FeatureFlatForGlobal on Volcanic Islands Accomplishes what r292982 was supposed to, which ended up only really making the necessary test changes. This should be applied to the 4.0 branch. Patch by Vedran Miletić ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_40@293329 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index c5d7d85d405..13022009af1 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -282,12 +282,6 @@ def FeatureEnableSIScheduler : SubtargetFeature<"si-scheduler", "Enable SI Machine Scheduler" >; -def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64", - "NoAddr64", - "true", - "MUBUF instructions have addr64 bit" ->; - // Unless +-flat-for-global is specified, turn on FlatForGlobal for // all OS-es on VI and newer hardware to avoid assertion failures due // to missing ADDR64 variants of MUBUF instructions. @@ -297,8 +291,7 @@ def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64", def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global", "FlatForGlobal", "true", - "Force to generate flat instruction for global", - [FeatureNoAddr64] + "Force to generate flat instruction for global" >; // Dummy feature used to disable assembler instructions. @@ -350,8 +343,7 @@ def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS", FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN, FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts, FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel, - FeatureScalarStores, FeatureInv2PiInlineImm, - FeatureNoAddr64 + FeatureScalarStores, FeatureInv2PiInlineImm ] >; diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index e2ec839a953..c35a67de1d7 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -48,6 +48,13 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT, ParseSubtargetFeatures(GPU, FullFS); + // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es + // on VI and newer hardware to avoid assertion failures due to missing ADDR64 + // variants of MUBUF instructions. + if (!hasAddr64() && !FS.contains("flat-for-global")) { + FlatForGlobal = true; + } + // FIXME: I don't think think Evergreen has any useful support for // denormals, but should be checked. Should we issue a warning somewhere // if someone tries to enable these? @@ -83,7 +90,6 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS, FP64Denormals(false), FPExceptions(false), FlatForGlobal(false), - NoAddr64(false), UnalignedScratchAccess(false), UnalignedBufferAccess(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 51ca36fb7b3..0e3cb7dc1f8 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -86,7 +86,6 @@ protected: bool FP64Denormals; bool FPExceptions; bool FlatForGlobal; - bool NoAddr64; bool UnalignedScratchAccess; bool UnalignedBufferAccess; bool EnableXNACK; diff --git a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll b/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll deleted file mode 100644 index 8227d4c873e..00000000000 --- a/test/CodeGen/AMDGPU/ci-use-flat-for-global.ll +++ /dev/null @@ -1,26 +0,0 @@ -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s -; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s - - -; There are no stack objects even though flat is used by default, so -; flat_scratch_init should be disabled. - -; ALL-LABEL: {{^}}test: -; HSA: .amd_kernel_code_t -; HSA: enable_sgpr_flat_scratch_init = 0 -; HSA: .end_amd_kernel_code_t - -; ALL-NOT: flat_scr - -; HSA-DEFAULT: flat_store_dword -; HSA-NODEFAULT: buffer_store_dword - -; NOHSA-DEFAULT: buffer_store_dword -; NOHSA-NODEFAULT: flat_store_dword -define void @test(i32 addrspace(1)* %out) { -entry: - store i32 0, i32 addrspace(1)* %out - ret void -} diff --git a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll index fb693de0e39..ad3992f4cd0 100644 --- a/test/CodeGen/AMDGPU/fcanonicalize.f16.ll +++ b/test/CodeGen/AMDGPU/fcanonicalize.f16.ll @@ -167,6 +167,6 @@ define void @test_fold_canonicalize_snan3_value_f16(half addrspace(1)* %out) #1 } attributes #0 = { nounwind readnone } -attributes #1 = { nounwind } -attributes #2 = { nounwind "target-features"="-fp16-denormals,-fp16-denormals" } -attributes #3 = { nounwind "target-features"="+fp16-denormals,+fp64-denormals" } +attributes #1 = { nounwind "target-features"="-flat-for-global" } +attributes #2 = { nounwind "target-features"="-flat-for-global,-fp16-denormals,-fp16-denormals" } +attributes #3 = { nounwind "target-features"="-flat-for-global,+fp16-denormals,+fp64-denormals" } diff --git a/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll b/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll new file mode 100644 index 00000000000..df9ba00c697 --- /dev/null +++ b/test/CodeGen/AMDGPU/flat-for-global-subtarget-feature.ll @@ -0,0 +1,54 @@ +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s +; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s + + +; There are no stack objects even though flat is used by default, so +; flat_scratch_init should be disabled. + +; ALL-LABEL: {{^}}test: +; HSA: .amd_kernel_code_t +; HSA: enable_sgpr_flat_scratch_init = 0 +; HSA: .end_amd_kernel_code_t + +; ALL-NOT: flat_scr + +; HSA-DEFAULT: flat_store_dword +; HSA-NODEFAULT: buffer_store_dword +; HSA-NOADDR64: flat_store_dword + +; NOHSA-DEFAULT: buffer_store_dword +; NOHSA-NODEFAULT: flat_store_dword +; NOHSA-NOADDR64: flat_store_dword +define void @test(i32 addrspace(1)* %out) { +entry: + store i32 0, i32 addrspace(1)* %out + ret void +} + +; HSA-DEFAULT: flat_store_dword +; HSA-NODEFAULT: buffer_store_dword +; HSA-NOADDR64: flat_store_dword + +; NOHSA-DEFAULT: buffer_store_dword +; NOHSA-NODEFAULT: flat_store_dword +; NOHSA-NOADDR64: flat_store_dword +define void @test_addr64(i32 addrspace(1)* %out) { +entry: + %out.addr = alloca i32 addrspace(1)*, align 4 + + store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4 + %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4 + + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0 + store i32 1, i32 addrspace(1)* %arrayidx, align 4 + + %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1 + store i32 2, i32 addrspace(1)* %arrayidx1, align 4 + + ret void +} diff --git a/test/CodeGen/AMDGPU/v_madak_f16.ll b/test/CodeGen/AMDGPU/v_madak_f16.ll index 7707d6b1baa..df220d7a977 100644 --- a/test/CodeGen/AMDGPU/v_madak_f16.ll +++ b/test/CodeGen/AMDGPU/v_madak_f16.ll @@ -1,5 +1,5 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s -; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals,-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s ; GCN-LABEL: {{^}}madak_f16 ; GCN: buffer_load_ushort v[[A_F16:[0-9]+]]