From: Matt Arsenault Date: Fri, 4 Oct 2019 08:35:38 +0000 (+0000) Subject: AMDGPU/GlobalISel: Fix using wrong addrspace for aperture X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c15d04b5f6f1b727c3cd2921d7980368ebf69e7f;p=llvm AMDGPU/GlobalISel: Fix using wrong addrspace for aperture This was always passing the destination flat address space, when it should be picking between the two valid source options. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373716 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index cf4275c23e8..f61c9753d2a 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1085,6 +1085,8 @@ Register AMDGPULegalizerInfo::getSegmentAperture( const GCNSubtarget &ST = MF.getSubtarget(); const LLT S32 = LLT::scalar(32); + assert(AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS); + if (ST.hasApertureRegs()) { // FIXME: Use inline constants (src_{shared, private}_base) instead of // getreg. @@ -1231,7 +1233,7 @@ bool AMDGPULegalizerInfo::legalizeAddrSpaceCast( auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); - Register ApertureReg = getSegmentAperture(DestAS, MRI, B); + Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); if (!ApertureReg.isValid()) return false; diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir index 9d1c7fb1011..a27040cebc4 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-addrspacecast.mir @@ -259,9 +259,9 @@ body: | ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 64, align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY1]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) @@ -271,7 +271,7 @@ body: | ; GFX9: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759 ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32) ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](p3), [[C]] @@ -465,16 +465,16 @@ body: | ; VI: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; VI: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 ; VI: [[COPY2:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 + ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 ; VI: [[GEP:%[0-9]+]]:_(p4) = G_GEP [[COPY2]], [[C2]](s64) - ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[GEP]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 64, align 64, addrspace 4) ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] ; VI: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; VI: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[LOAD]](s32) ; VI: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] ; VI: [[COPY3:%[0-9]+]]:_(p4) = COPY [[COPY]](p4) ; VI: [[GEP1:%[0-9]+]]:_(p4) = G_GEP [[COPY3]], [[C2]](s64) - ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 68, addrspace 4) + ; VI: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[GEP1]](p4) :: (dereferenceable invariant load 4 from `i8 addrspace(4)* undef` + 64, align 64, addrspace 4) ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] ; VI: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3) ; VI: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT1]](s32), [[LOAD1]](s32) @@ -486,14 +486,14 @@ body: | ; GFX9: [[UV:%[0-9]+]]:_(p3), [[UV1:%[0-9]+]]:_(p3) = G_UNMERGE_VALUES [[COPY]](<2 x p3>) ; GFX9: [[C:%[0-9]+]]:_(p3) = G_CONSTANT i32 -1 ; GFX9: [[C1:%[0-9]+]]:_(p0) = G_CONSTANT i64 0 - ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[S_GETREG_B32_:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759 ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; GFX9: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_]], [[C2]](s32) ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](p3), [[C]] ; GFX9: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV]](p3) ; GFX9: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[PTRTOINT]](s32), [[SHL]](s32) ; GFX9: [[SELECT:%[0-9]+]]:_(p0) = G_SELECT [[ICMP]](s1), [[MV]], [[C1]] - ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 30735 + ; GFX9: [[S_GETREG_B32_1:%[0-9]+]]:sreg_32(s32) = S_GETREG_B32 31759 ; GFX9: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[S_GETREG_B32_1]], [[C2]](s32) ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](p3), [[C]] ; GFX9: [[PTRTOINT1:%[0-9]+]]:_(s32) = G_PTRTOINT [[UV1]](p3)