AMDGPU: Allow sinking of addressing modes for atomic_inc/dec

author Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 15 Mar 2017 23:15:12 +0000 (23:15 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Wed, 15 Mar 2017 23:15:12 +0000 (23:15 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 15 Mar 2017 23:15:12 +0000 (23:15 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Wed, 15 Mar 2017 23:15:12 +0000 (23:15 +0000)
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp

index 117c22ac2d59ef5708b29679a96ad6e89d183416..4085b9291477ed6ef901699c11141438da8bd1af 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -61,6 +61,7 @@
  #include "llvm/IR/InstrTypes.h"
  #include "llvm/IR/Instruction.h"
  #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
  #include "llvm/IR/Type.h"
  #include "llvm/Support/Casting.h"
  #include "llvm/Support/CodeGen.h"
@@ -505,6 +506,13 @@ const SISubtarget *SITargetLowering::getSubtarget() const {
  // TargetLowering queries
  //===----------------------------------------------------------------------===//
  
+bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
+                                          EVT) const {
+  // SI has some legal vector types, but no legal vector operations. Say no
+  // shuffles are legal in order to prefer scalarizing some vector operations.
+  return false;
+}
+
  bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                            const CallInst &CI,
                                            unsigned IntrID) const {
@@ -524,11 +532,20 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    }
  }
  
-bool SITargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &,
-                                          EVT) const {
-  // SI has some legal vector types, but no legal vector operations. Say no
-  // shuffles are legal in order to prefer scalarizing some vector operations.
-  return false;
+bool SITargetLowering::getAddrModeArguments(IntrinsicInst *II,
+                                            SmallVectorImpl<Value*> &Ops,
+                                            Type *&AccessTy) const {
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::amdgcn_atomic_inc:
+  case Intrinsic::amdgcn_atomic_dec: {
+    Value *Ptr = II->getArgOperand(0);
+    AccessTy = II->getType();
+    Ops.push_back(Ptr);
+    return true;
+  }
+  default:
+    return false;
+  }
  }
  
  bool SITargetLowering::isLegalFlatAddressingMode(const AddrMode &AM) const {
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h

index 984640dfdb027713c890bba9aae0cab021fa56a2..bc982458cd818c6d6c21d91dc078cb25b8168d14 100644 (file)
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -123,11 +123,15 @@ public:
  
    const SISubtarget *getSubtarget() const;
  
+  bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
+                          EVT /*VT*/) const override;
+
    bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
                            unsigned IntrinsicID) const override;
  
-  bool isShuffleMaskLegal(const SmallVectorImpl<int> &/*Mask*/,
-                          EVT /*VT*/) const override;
+  bool getAddrModeArguments(IntrinsicInst * /*I*/,
+                            SmallVectorImpl<Value*> &/*Ops*/,
+                            Type *&/*AccessTy*/) const override;
  
    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                               unsigned AS) const override;
diff --git a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll

index f7ce3fc09fc605905f5ed10325275c49c0e28a76..b14797c78976db73b056de1600e79ae0300da848 100644 (file)
--- a/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
+++ b/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
@@ -622,7 +622,62 @@ done:
    ret void
  }
  
+; OPT-LABEL: @test_sink_local_small_offset_atomic_inc_i32(
+; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
+; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
+; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
+; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
+define void @test_sink_local_small_offset_atomic_inc_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
+  %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tmp0 = icmp eq i32 %tid, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(3)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
+; OPT-LABEL: @test_sink_local_small_offset_atomic_dec_i32(
+; OPT: %sunkaddr = ptrtoint i32 addrspace(3)* %in to i32
+; OPT: %sunkaddr1 = add i32 %sunkaddr, 28
+; OPT: %sunkaddr2 = inttoptr i32 %sunkaddr1 to i32 addrspace(3)*
+; OPT: %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %sunkaddr2, i32 2)
+define void @test_sink_local_small_offset_atomic_dec_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) {
+entry:
+  %out.gep = getelementptr i32, i32 addrspace(3)* %out, i32 999999
+  %in.gep = getelementptr i32, i32 addrspace(3)* %in, i32 7
+  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
+  %tmp0 = icmp eq i32 %tid, 0
+  br i1 %tmp0, label %endif, label %if
+
+if:
+  %tmp1 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %in.gep, i32 2)
+  br label %endif
+
+endif:
+  %x = phi i32 [ %tmp1, %if ], [ 0, %entry ]
+  store i32 %x, i32 addrspace(3)* %out.gep
+  br label %done
+
+done:
+  ret void
+}
+
  declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
+declare i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
+declare i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* nocapture, i32) #2
  
  attributes #0 = { nounwind readnone }
  attributes #1 = { nounwind }
+attributes #2 = { nounwind argmemonly }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 15 Mar 2017 23:15:12 +0000 (23:15 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Wed, 15 Mar 2017 23:15:12 +0000 (23:15 +0000)
lib/Target/AMDGPU/SIISelLowering.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIISelLowering.h		patch \| blob \| history
test/CodeGen/AMDGPU/cgp-addressing-modes.ll		patch \| blob \| history