]> granicus.if.org Git - llvm/commitdiff
AMDGPU/SI: Define an intrinsic to expose ds_swizzle_b32
authorChangpeng Fang <changpeng.fang@gmail.com>
Wed, 22 Jun 2016 21:33:49 +0000 (21:33 +0000)
committerChangpeng Fang <changpeng.fang@gmail.com>
Wed, 22 Jun 2016 21:33:49 +0000 (21:33 +0000)
Reviewers: tstellarAMD, arsenm

Differential Revision: http://reviews.llvm.org/D21533

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@273496 91177308-0d34-0410-b5e6-96231b3b80d8

include/llvm/IR/IntrinsicsAMDGPU.td
lib/Target/AMDGPU/SIInstructions.td
test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll [new file with mode: 0644]

index 48e215e232f1a55c762e2a1fb73d56c345fe4606..51afb83d7c6557a8bbe31bd5fa182b5c51b4c94d 100644 (file)
@@ -373,6 +373,11 @@ def int_amdgcn_mbcnt_hi :
   GCCBuiltin<"__builtin_amdgcn_mbcnt_hi">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
+// llvm.amdgcn.ds.swizzle src offset
+def int_amdgcn_ds_swizzle :
+  GCCBuiltin<"__builtin_amdgcn_ds_swizzle">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+
 //===----------------------------------------------------------------------===//
 // CI+ Intrinsics
 //===----------------------------------------------------------------------===//
index 6f7159cf36679f8d8a66005ddde2f36169bba8c0..814cbd3093df4850f697735a40b60a75c30d3ce9 100644 (file)
@@ -823,7 +823,11 @@ defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmps
 defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
 defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
 defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
+
+let Uses = [EXEC], mayLoad =0, mayStore = 0, isConvergent = 1 in {
 defm DS_SWIZZLE_B32 : DS_1A_RET <0x35, "ds_swizzle_b32", VGPR_32>;
+}
+
 let mayStore = 0 in {
 defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
 defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
@@ -2338,6 +2342,14 @@ def : Pat <
   (S_GETREG_B32 (as_i16imm $simm16))
 >;
 
+//===----------------------------------------------------------------------===//
+// DS_SWIZZLE Intrinsic Pattern.
+//===----------------------------------------------------------------------===//
+def : Pat <
+  (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
+  (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
+>;
+
 //===----------------------------------------------------------------------===//
 // SMRD Patterns
 //===----------------------------------------------------------------------===//
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
new file mode 100644 (file)
index 0000000..ef3cb00
--- /dev/null
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0
+
+; FUNC-LABEL: {{^}}ds_swizzle:
+; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:100
+; CHECK: s_waitcnt lgkmcnt
+define void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind {
+  %swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
+  store i32 %swizzle, i32 addrspace(1)* %out, align 4
+  ret void
+}
+
+attributes #0 = { nounwind readnone convergent }