From: Matt Arsenault Date: Wed, 15 Mar 2017 23:15:12 +0000 (+0000) Subject: AMDGPU: Allow sinking of addressing modes for atomic_inc/dec X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d0064ed89eb28fc3a9fbf9277ac6e42c212ab276;p=llvm AMDGPU: Allow sinking of addressing modes for atomic_inc/dec git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@297913 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 117c22ac2d5..4085b929147 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -61,6 +61,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Type.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CodeGen.h" @@ -505,6 +506,13 @@ const SISubtarget *SITargetLowering::getSubtarget() const { // TargetLowering queries //===----------------------------------------------------------------------===// +bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &, + EVT) const { + // SI has some legal vector types, but no legal vector operations. Say no + // shuffles are legal in order to prefer scalarizing some vector operations. + return false; +} + bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &CI, unsigned IntrID) const { @@ -524,11 +532,20 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, } } -bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl &, - EVT) const { - // SI has some legal vector types, but no legal vector operations. Say no - // shuffles are legal in order to prefer scalarizing some vector operations. - return false; +bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II, + SmallVectorImpl &Ops, + Type *&AccessTy) const { + switch (II->getIntrinsicID()) { + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: { + Value *Ptr = II->getArgOperand(0); + AccessTy = II->getType(); + Ops.push_back(Ptr); + return true; + } + default: + return false; + } } bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const { diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 984640dfdb0..bc982458cd8 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -123,11 +123,15 @@ public: const SISubtarget *getSubtarget() const; + bool isShuffleMaskLegal(const SmallVectorImpl &/*Mask*/, + EVT /*VT*/) const override; + bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, unsigned IntrinsicID) const override; - bool isShuffleMaskLegal(const SmallVectorImpl &/*Mask*/, - EVT /*VT*/) const override; + bool getAddrModeArguments(IntrinsicInst * /*I*/, + SmallVectorImpl &/*Ops*/, + Type *&/*AccessTy*/) const override; bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS) const override; diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll index f7ce3fc09fc..b14797c7897 100644 --- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll +++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll @@ -622,7 +622,62 @@ done: ret void } +; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32( +; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32 +; OPT: %sunkaddr1 = add i32 %sunkaddr, 28 +; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)* +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2) +define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2) + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + +; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32( +; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32 +; OPT: %sunkaddr1 = add i32 %sunkaddr, 28 +; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)* +; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2) +define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) { +entry: + %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999 + %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7 + %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 + %tmp0 = icmp eq i32 %tid, 0 + br i1 %tmp0, label %endif, label %if + +if: + %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2) + br label %endif + +endif: + %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] + store i32 %x, i32 addrspace(3)* %out.gep + br label %done + +done: + ret void +} + declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 +declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2 attributes #0 = { nounwind readnone } attributes #1 = { nounwind } +attributes #2 = { nounwind argmemonly }