From cef595ebd3a2bee63de273139facf449fc4e19b0 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Fri, 17 Apr 2015 16:59:29 +0000 Subject: [PATCH] Merging r227822: ------------------------------------------------------------------------ r227822 | thomas.stellard | 2015-02-02 13:02:28 -0500 (Mon, 02 Feb 2015) | 6 lines R600/SI: 64-bit and larger memory access must be at least 4-byte aligned This is true for SI only. CI+ supports unaligned memory accesses, but this requires driver support, so for now we disallow unaligned accesses for all GCN targets. ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_36@235206 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/R600/SIISelLowering.cpp | 8 +-- test/CodeGen/R600/cvt_f32_ubyte.ll | 2 +- test/CodeGen/R600/unaligned-load-store.ll | 79 +++++++++++++++++++++-- 3 files changed, 80 insertions(+), 9 deletions(-) diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index 6b2ea0682a4..52592245bf4 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -314,9 +314,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, if (!VT.isSimple() || VT == MVT::Other) return false; - // XXX - CI changes say "Support for unaligned memory accesses" but I don't - // see what for specifically. The wording everywhere else seems to be the - // same. + // TODO - CI+ supports unaligned memory accesses, but this requires driver + // support. // XXX - The only mention I see of this in the ISA manual is for LDS direct // reads the "byte address and must be dword aligned". Is it also true for the @@ -333,7 +332,8 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT, // This applies to private, global, and constant memory. if (IsFast) *IsFast = true; - return VT.bitsGT(MVT::i32); + + return VT.bitsGT(MVT::i32) && Align % 4 == 0; } EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, diff --git a/test/CodeGen/R600/cvt_f32_ubyte.ll b/test/CodeGen/R600/cvt_f32_ubyte.ll index afcc95e465e..69eea5919c0 100644 --- a/test/CodeGen/R600/cvt_f32_ubyte.ll +++ b/test/CodeGen/R600/cvt_f32_ubyte.ll @@ -146,7 +146,7 @@ define void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias %out, <7 x i8> ; SI: buffer_store_dword ; SI: buffer_store_dword define void @load_v8i8_to_v8f32(<8 x float> addrspace(1)* noalias %out, <8 x i8> addrspace(1)* noalias %in) nounwind { - %load = load <8 x i8> addrspace(1)* %in, align 1 + %load = load <8 x i8> addrspace(1)* %in, align 8 %cvt = uitofp <8 x i8> %load to <8 x float> store <8 x float> %cvt, <8 x float> addrspace(1)* %out, align 16 ret void diff --git a/test/CodeGen/R600/unaligned-load-store.ll b/test/CodeGen/R600/unaligned-load-store.ll index 2c4e02f912d..3c9a2c120ce 100644 --- a/test/CodeGen/R600/unaligned-load-store.ll +++ b/test/CodeGen/R600/unaligned-load-store.ll @@ -1,18 +1,65 @@ ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s -; SI-LABEL: {{^}}unaligned_load_store_i32: +; SI-LABEL: {{^}}unaligned_load_store_i32_local: +; SI: ds_read_u8 +; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_write_b32 ; SI: s_endpgm -define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { +define void @unaligned_load_store_i32_local(i32 addrspace(3)* %p, i32 addrspace(3)* %r) nounwind { %v = load i32 addrspace(3)* %p, align 1 store i32 %v, i32 addrspace(3)* %r, align 1 ret void } -; SI-LABEL: {{^}}unaligned_load_store_v4i32: +; SI-LABEL: {{^}}unaligned_load_store_i32_global: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_store_dword +define void @unaligned_load_store_i32_global(i32 addrspace(1)* %p, i32 addrspace(1)* %r) nounwind { + %v = load i32 addrspace(1)* %p, align 1 + store i32 %v, i32 addrspace(1)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_i64_local: +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_read_u8 +; SI: ds_write2_b32 +; SI: s_endpgm +define void @unaligned_load_store_i64_local(i64 addrspace(3)* %p, i64 addrspace(3)* %r) { + %v = load i64 addrspace(3)* %p, align 1 + store i64 %v, i64 addrspace(3)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_i64_global: +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_load_ubyte +; SI: buffer_store_dwordx2 +define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) { + %v = load i64 addrspace(1)* %p, align 1 + store i64 %v, i64 addrspace(1)* %r, align 1 + ret void +} + +; SI-LABEL: {{^}}unaligned_load_store_v4i32_local: ; SI: ds_read_u8 ; SI: ds_read_u8 ; SI: ds_read_u8 @@ -38,12 +85,36 @@ define void @unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r ; SI: ds_write_b32 ; SI: ds_write_b32 ; SI: s_endpgm -define void @unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { +define void @unaligned_load_store_v4i32_local(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) nounwind { %v = load <4 x i32> addrspace(3)* %p, align 1 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 ret void } +; FIXME: We mark v4i32 as custom, so misaligned loads are never expanded. +; FIXME-SI-LABEL: {{^}}unaligned_load_store_v4i32_global +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +; FIXME-SI: buffer_load_ubyte +define void @unaligned_load_store_v4i32_global(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) nounwind { + %v = load <4 x i32> addrspace(1)* %p, align 1 + store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 + ret void +} + ; SI-LABEL: {{^}}load_lds_i64_align_4: ; SI: ds_read2_b32 ; SI: s_endpgm -- 2.50.1