"Enable SI Machine Scheduler"
>;
-def FeatureNoAddr64 : SubtargetFeature<"mubuf-no-addr64",
- "NoAddr64",
- "true",
- "MUBUF instructions have addr64 bit"
->;
-
// Unless +-flat-for-global is specified, turn on FlatForGlobal for
// all OS-es on VI and newer hardware to avoid assertion failures due
// to missing ADDR64 variants of MUBUF instructions.
def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"FlatForGlobal",
"true",
- "Force to generate flat instruction for global",
- [FeatureNoAddr64]
+ "Force to generate flat instruction for global"
>;
// Dummy feature used to disable assembler instructions.
FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
FeatureScalarStores, FeatureInv2PiInlineImm, FeatureSDWA,
- FeatureDPP, FeatureNoAddr64
+ FeatureDPP
]
>;
ParseSubtargetFeatures(GPU, FullFS);
+ // Unless +-flat-for-global is specified, turn on FlatForGlobal for all OS-es
+ // on VI and newer hardware to avoid assertion failures due to missing ADDR64
+ // variants of MUBUF instructions.
+ if (!hasAddr64() && !FS.contains("flat-for-global")) {
+ FlatForGlobal = true;
+ }
+
// FIXME: I don't think think Evergreen has any useful support for
// denormals, but should be checked. Should we issue a warning somewhere
// if someone tries to enable these?
FP64FP16Denormals(false),
FPExceptions(false),
FlatForGlobal(false),
- NoAddr64(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
bool FP64FP16Denormals;
bool FPExceptions;
bool FlatForGlobal;
- bool NoAddr64;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool EnableXNACK;
+++ /dev/null
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
-; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
-
-
-; There are no stack objects even though flat is used by default, so
-; flat_scratch_init should be disabled.
-
-; ALL-LABEL: {{^}}test:
-; HSA: .amd_kernel_code_t
-; HSA: enable_sgpr_flat_scratch_init = 0
-; HSA: .end_amd_kernel_code_t
-
-; ALL-NOT: flat_scr
-
-; HSA-DEFAULT: flat_store_dword
-; HSA-NODEFAULT: buffer_store_dword
-
-; NOHSA-DEFAULT: buffer_store_dword
-; NOHSA-NODEFAULT: flat_store_dword
-define void @test(i32 addrspace(1)* %out) {
-entry:
- store i32 0, i32 addrspace(1)* %out
- ret void
-}
--- /dev/null
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=HSA -check-prefix=HSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=HSA-NOADDR64 -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=-flat-for-global < %s | FileCheck -check-prefix=NOHSA-DEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -mattr=+flat-for-global < %s | FileCheck -check-prefix=NOHSA-NODEFAULT -check-prefix=ALL %s
+; RUN: llc -mtriple=amdgcn-- -mcpu=tonga < %s | FileCheck -check-prefix=NOHSA-NOADDR64 -check-prefix=ALL %s
+
+
+; There are no stack objects even though flat is used by default, so
+; flat_scratch_init should be disabled.
+
+; ALL-LABEL: {{^}}test:
+; HSA: .amd_kernel_code_t
+; HSA: enable_sgpr_flat_scratch_init = 0
+; HSA: .end_amd_kernel_code_t
+
+; ALL-NOT: flat_scr
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test(i32 addrspace(1)* %out) {
+entry:
+ store i32 0, i32 addrspace(1)* %out
+ ret void
+}
+
+; HSA-DEFAULT: flat_store_dword
+; HSA-NODEFAULT: buffer_store_dword
+; HSA-NOADDR64: flat_store_dword
+
+; NOHSA-DEFAULT: buffer_store_dword
+; NOHSA-NODEFAULT: flat_store_dword
+; NOHSA-NOADDR64: flat_store_dword
+define void @test_addr64(i32 addrspace(1)* %out) {
+entry:
+ %out.addr = alloca i32 addrspace(1)*, align 4
+
+ store i32 addrspace(1)* %out, i32 addrspace(1)** %out.addr, align 4
+ %ld0 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %ld0, i32 0
+ store i32 1, i32 addrspace(1)* %arrayidx, align 4
+
+ %ld1 = load i32 addrspace(1)*, i32 addrspace(1)** %out.addr, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %ld1, i32 1
+ store i32 2, i32 addrspace(1)* %arrayidx1, align 4
+
+ ret void
+}