From a3e4b69abb3f7417fab584fb614523275205eca3 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 16 May 2019 14:48:34 +0000 Subject: [PATCH] AMDGPU: Assume xnack is enabled by default This is the conservatively correct default. It is always safe to assume xnack is enabled, but not the converse. Introduce a feature to blacklist targets where xnack can never be meaningfully enabled. I'm not sure the targets this is applied to is 100% correct. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360903 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPU.td | 21 +++++- lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 8 ++- lib/Target/AMDGPU/AMDGPUSubtarget.h | 1 + .../AMDGPU/break-vmem-soft-clauses.mir | 3 + test/CodeGen/AMDGPU/spill-cfg-position.ll | 2 +- .../AMDGPU/inline-target-feature-xnack.ll | 67 +++++++++++++++++++ 6 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 test/Transforms/Inline/AMDGPU/inline-target-feature-xnack.ll diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td index 663569e67ba..341ef73a21c 100644 --- a/lib/Target/AMDGPU/AMDGPU.td +++ b/lib/Target/AMDGPU/AMDGPU.td @@ -108,6 +108,12 @@ def FeatureFmaMixInsts : SubtargetFeature<"fma-mix-insts", "Has v_fma_mix_f32, v_fma_mixlo_f16, v_fma_mixhi_f16 instructions" >; +def FeatureDoesNotSupportXNACK : SubtargetFeature<"no-xnack-support", + "DoesNotSupportXNACK", + "true", + "Hardware does not support XNACK" +>; + // XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support // XNACK. The current default kernel driver setting is: // - graphics ring: XNACK disabled @@ -560,7 +566,7 @@ def FeatureSouthernIslands : GCNSubtargetFeatureGeneration<"SOUTHERN_ISLANDS", [FeatureFP64, FeatureLocalMemorySize32768, FeatureMIMG_R128, FeatureWavefrontSize64, FeatureLDSBankCount32, FeatureMovrel, FeatureTrigReducedRange, - FeatureDoesNotSupportSRAMECC] + FeatureDoesNotSupportSRAMECC, FeatureDoesNotSupportXNACK] >; def FeatureSeaIslands : GCNSubtargetFeatureGeneration<"SEA_ISLANDS", @@ -624,16 +630,19 @@ def FeatureISAVersion6_0_0 : FeatureSet<[FeatureSouthernIslands, FeatureFastFMAF32, HalfRate64Ops, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion6_0_1 : FeatureSet< [FeatureSouthernIslands, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_0 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_1 : FeatureSet< @@ -641,22 +650,26 @@ def FeatureISAVersion7_0_1 : FeatureSet< HalfRate64Ops, FeatureLDSBankCount32, FeatureFastFMAF32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_2 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount16, FeatureFastFMAF32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_3 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount16, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion7_0_4 : FeatureSet< [FeatureSeaIslands, FeatureLDSBankCount32, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion8_0_1 : FeatureSet< @@ -673,12 +686,14 @@ def FeatureISAVersion8_0_2 : FeatureSet< FeatureLDSBankCount32, FeatureSGPRInitBug, FeatureUnpackedD16VMem, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion8_0_3 : FeatureSet< [FeatureVolcanicIslands, FeatureLDSBankCount32, FeatureUnpackedD16VMem, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion8_1_0 : FeatureSet< @@ -692,6 +707,7 @@ def FeatureISAVersion9_0_0 : FeatureSet< FeatureMadMixInsts, FeatureLDSBankCount32, FeatureCodeObjectV3, + FeatureDoesNotSupportXNACK, FeatureDoesNotSupportSRAMECC]>; def FeatureISAVersion9_0_2 : FeatureSet< @@ -706,6 +722,7 @@ def FeatureISAVersion9_0_4 : FeatureSet< [FeatureGFX9, FeatureLDSBankCount32, FeatureFmaMixInsts, + FeatureDoesNotSupportXNACK, FeatureDoesNotSupportSRAMECC, FeatureCodeObjectV3]>; @@ -717,6 +734,7 @@ def FeatureISAVersion9_0_6 : FeatureSet< FeatureDLInsts, FeatureDot1Insts, FeatureDot2Insts, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3]>; def FeatureISAVersion9_0_9 : FeatureSet< @@ -752,6 +770,7 @@ def FeatureISAVersion10_1_0 : FeatureSet< FeatureScalarAtomics, FeatureScalarFlatScratchInsts, FeatureLdsMisalignedBug, + FeatureDoesNotSupportXNACK, FeatureCodeObjectV3])>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp index 6d47631da1e..a88218f68b5 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp @@ -78,7 +78,7 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, // unset everything else if it is disabled // Assuming ECC is enabled is the conservative default. - SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,"); + SmallString<256> FullFS("+promote-alloca,+load-store-opt,+sram-ecc,+xnack,"); if (isAmdHsaOS()) // Turn on FlatForGlobal for HSA. FullFS += "+flat-for-global,+unaligned-buffer-access,+trap-handler,"; @@ -130,6 +130,11 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT, HasFminFmaxLegacy = getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS; + if (DoesNotSupportXNACK && EnableXNACK) { + ToggleFeature(AMDGPU::FeatureXNACK); + EnableXNACK = false; + } + // ECC is on by default, but turn it off if the hardware doesn't support it // anyway. This matters for the gfx9 targets with d16 loads, but don't support // ECC. @@ -181,6 +186,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS, HasApertureRegs(false), EnableXNACK(false), + DoesNotSupportXNACK(false), EnableCuMode(false), TrapHandler(false), diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h index 2477ee7ce7d..1ef72622980 100644 --- a/lib/Target/AMDGPU/AMDGPUSubtarget.h +++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h @@ -294,6 +294,7 @@ protected: bool UnalignedBufferAccess; bool HasApertureRegs; bool EnableXNACK; + bool DoesNotSupportXNACK; bool EnableCuMode; bool TrapHandler; diff --git a/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir b/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir index e785154c243..323396795dd 100644 --- a/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir +++ b/test/CodeGen/AMDGPU/break-vmem-soft-clauses.mir @@ -1,4 +1,7 @@ # RUN: llc -march=amdgcn -mcpu=carrizo -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s + +# Make sure the default assumption is xnack enabled with no cpu +# RUN: llc -march=amdgcn -verify-machineinstrs -mattr=+volcanic-islands -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,XNACK %s # RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,NOXNACK %s --- # Trivial clause at beginning of program diff --git a/test/CodeGen/AMDGPU/spill-cfg-position.ll b/test/CodeGen/AMDGPU/spill-cfg-position.ll index cbf9f37e29e..d78c4de6280 100644 --- a/test/CodeGen/AMDGPU/spill-cfg-position.ll +++ b/test/CodeGen/AMDGPU/spill-cfg-position.ll @@ -1,4 +1,4 @@ -; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s +; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tahiti -verify-machineinstrs -stress-regalloc=6 < %s | FileCheck %s ; Inline spiller can decide to move a spill as early as possible in the basic block. ; It will skip phis and label, but we also need to make sure it skips instructions diff --git a/test/Transforms/Inline/AMDGPU/inline-target-feature-xnack.ll b/test/Transforms/Inline/AMDGPU/inline-target-feature-xnack.ll new file mode 100644 index 00000000000..3b0cfb54cea --- /dev/null +++ b/test/Transforms/Inline/AMDGPU/inline-target-feature-xnack.ll @@ -0,0 +1,67 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +define i32 @func_default() #0 { + ret i32 0 +} + +define i32 @func_xnack_enabled() #1 { + ret i32 0 +} + +define i32 @func_xnack_disabled() #2 { + ret i32 0 +} + +; CHECK-LABEL: @default_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @xnack_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @xnack_enabled_call_xnack_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_enabled_call_xnack_enabled() #1 { + %call = call i32 @func_xnack_enabled() + ret i32 %call +} + +; CHECK-LABEL: @xnack_enabled_call_xnack_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_enabled_call_xnack_disabled() #1 { + %call = call i32 @func_xnack_disabled() + ret i32 %call +} + +; CHECK-LABEL: @xnack_disabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @xnack_disabled_call_xnack_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_disabled_call_xnack_enabled() #2 { + %call = call i32 @func_xnack_enabled() + ret i32 %call +} + +; CHECK-LABEL: @xnack_disabled_call_xnack_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @xnack_disabled_call_xnack_disabled() #2 { + %call = call i32 @func_xnack_disabled() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-features"="+xnack" } +attributes #2 = { nounwind "target-features"="-xnack" } -- 2.50.1