AMDGPU: Convert image intrinsic uses in tests

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 21 Mar 2017 16:24:12 +0000 (16:24 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 21 Mar 2017 16:24:12 +0000 (16:24 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 21 Mar 2017 16:24:12 +0000 (16:24 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 21 Mar 2017 16:24:12 +0000 (16:24 +0000)
diff --git a/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll b/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll

index 24d0406b4c690b8abc856da82942a17e474050cb..ef1b3d25f88337a516bf1660474367b0b81425da 100644 (file)
--- a/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll
+++ b/test/CodeGen/AMDGPU/coalescer-subrange-crash.ll
@@ -36,7 +36,8 @@ main_body:
    %tmp31 = insertelement <16 x i32> %tmp30, i32 undef, i32 6
    %tmp32 = insertelement <16 x i32> %tmp31, i32 undef, i32 7
    %tmp33 = insertelement <16 x i32> %tmp32, i32 undef, i32 8
-  %tmp34 = call <4 x float> @llvm.SI.image.sample.c.d.o.v16i32(<16 x i32> %tmp33, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %tmp33.bc = bitcast <16 x i32> %tmp33 to <16 x float>
+  %tmp34 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp33.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true)
    %tmp35 = extractelement <4 x float> %tmp34, i32 0
    %tmp36 = bitcast float %tmp24 to i32
    %tmp37 = insertelement <16 x i32> <i32 212739, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>, i32 %tmp36, i32 1
@@ -47,7 +48,8 @@ main_body:
    %tmp42 = insertelement <16 x i32> %tmp41, i32 undef, i32 6
    %tmp43 = insertelement <16 x i32> %tmp42, i32 undef, i32 7
    %tmp44 = insertelement <16 x i32> %tmp43, i32 undef, i32 8
-  %tmp45 = call <4 x float> @llvm.SI.image.sample.c.d.o.v16i32(<16 x i32> %tmp44, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0)
+  %tmp44.bc = bitcast <16 x i32> %tmp44 to <16 x float>
+  %tmp45 = call <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float> %tmp44.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 true)
    %tmp46 = extractelement <4 x float> %tmp45, i32 0
    %tmp47 = fmul float %tmp35, %tmp46
    %tmp48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, float %tmp47, 14
@@ -55,20 +57,10 @@ main_body:
    ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %tmp49
  }
  
-; Function Attrs: nounwind readnone
-declare float @llvm.SI.load.const(<16 x i8>, i32) #0
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.c.d.o.v4f32.v16f32.v8i32(<16 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.c.d.o.v16i32(<16 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/commute-shifts.ll b/test/CodeGen/AMDGPU/commute-shifts.ll

index c90af6a35736cd6643c1f9b848190735258dad2a..84d8bf2bd706a36cbe4b574be4bbe5596f7203ab 100644 (file)
--- a/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -7,7 +7,7 @@
  define amdgpu_ps float @main(float %arg0, float %arg1) #0 {
  bb:
    %tmp = fptosi float %arg0 to i32
-  %tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp1 = call <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32> undef, <8 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false)
    %tmp2.f = extractelement <4 x float> %tmp1, i32 0
    %tmp2 = bitcast float %tmp2.f to i32
    %tmp3 = and i32 %tmp, 7
@@ -20,8 +20,9 @@ bb:
    ret float %tmp9
  }
  
-declare <4 x float> @llvm.SI.image.load.v4i32(<4 x i32>, <8 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
  declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
+declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/else.ll b/test/CodeGen/AMDGPU/else.ll

index ef1e64763d4a5cc4ad695c3aac5e8446336cff79..22338e4f50e568c6e21dcd9b1e0470a8c2c08104 100644 (file)
--- a/test/CodeGen/AMDGPU/else.ll
+++ b/test/CodeGen/AMDGPU/else.ll
@@ -1,12 +1,12 @@
-; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
  
  ; CHECK-LABEL: {{^}}else_no_execfix:
  ; CHECK: ; %Flow
  ; CHECK-NEXT: s_or_saveexec_b64 [[DST:s\[[0-9]+:[0-9]+\]]],
  ; CHECK-NEXT: s_xor_b64 exec, exec, [[DST]]
  ; CHECK-NEXT: ; mask branch
-define amdgpu_ps float @else_no_execfix(i32 %z, float %v) {
+define amdgpu_ps float @else_no_execfix(i32 %z, float %v) #0 {
  main_body:
    %cc = icmp sgt i32 %z, 5
    br i1 %cc, label %if, label %else
@@ -33,7 +33,7 @@ end:
  ; CHECK-NEXT: s_and_b64 [[AND_INIT:s\[[0-9]+:[0-9]+\]]], exec, [[DST]]
  ; CHECK-NEXT: s_xor_b64 exec, exec, [[AND_INIT]]
  ; CHECK-NEXT: ; mask branch
-define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) {
+define amdgpu_ps void @else_execfix_leave_wqm(i32 %z, float %v) #0 {
  main_body:
    %cc = icmp sgt i32 %z, 5
    br i1 %cc, label %if, label %else
@@ -44,8 +44,7 @@ if:
  
  else:
    %c = fmul float %v, 3.0
-  %c.i = bitcast float %c to i32
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %v.else = extractelement <4 x float> %tex, i32 0
    br label %end
  
@@ -55,6 +54,9 @@ end:
    ret void
  }
  
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
+declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  
-declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
+attributes #0 = { nounwind }
+attributes #1 = { nounwind writeonly }
+attributes #2 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll

index fd1a463fd3e999a9f67fb4bcd60a5b1997b4114e..f0af876567b493a63530161ff8e22c39fc966c15 100644 (file)
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK %s
  
  ; CHECK-LABEL: {{^}}test1:
  ; CHECK: v_cndmask_b32_e64 v0, 0, 1, exec
@@ -7,7 +7,7 @@
  ; there is no WQM use and therefore llvm.amdgcn.ps.live is constant. However,
  ; the expectation is that the intrinsic will be used in non-trivial shaders,
  ; so such an optimization doesn't seem worth the effort.
-define amdgpu_ps float @test1() {
+define amdgpu_ps float @test1() #0 {
    %live = call i1 @llvm.amdgcn.ps.live()
    %live.32 = zext i1 %live to i32
    %r = bitcast i32 %live.32 to float
@@ -19,12 +19,11 @@ define amdgpu_ps float @test1() {
  ; CHECK-DAG: s_wqm_b64 exec, exec
  ; CHECK-DAG: v_cndmask_b32_e64 [[VAR:v[0-9]+]], 0, 1, [[LIVE]]
  ; CHECK: image_sample v0, [[VAR]],
-define amdgpu_ps float @test2() {
+define amdgpu_ps float @test2() #0 {
    %live = call i1 @llvm.amdgcn.ps.live()
    %live.32 = zext i1 %live to i32
-
-  %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %live.32, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-
+  %live.32.bc = bitcast i32 %live.32 to float
+  %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %live.32.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %r = extractelement <4 x float> %t, i32 0
    ret float %r
  }
@@ -35,7 +34,7 @@ define amdgpu_ps float @test2() {
  ; CHECK-DAG: s_xor_b64 [[HELPER:s\[[0-9]+:[0-9]+\]]], [[LIVE]], -1
  ; CHECK_DAG: s_and_saveexec_b64 [[SAVED:s\[[0-9]+:[0-9]+\]]], [[HELPER]]
  ; CHECK: ; %dead
-define amdgpu_ps float @test3(i32 %in) {
+define amdgpu_ps float @test3(i32 %in) #0 {
  entry:
    %live = call i1 @llvm.amdgcn.ps.live()
    br i1 %live, label %end, label %dead
@@ -46,14 +45,15 @@ dead:
  
  end:
    %tc = phi i32 [ %in, %entry ], [ %tc.dead, %dead ]
-  %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %tc, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-
+  %tc.bc = bitcast i32 %tc to float
+  %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %tc.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %r = extractelement <4 x float> %t, i32 0
    ret float %r
  }
  
-declare i1 @llvm.amdgcn.ps.live() #0
-
-declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #0
+declare i1 @llvm.amdgcn.ps.live() #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  
-attributes #0 = { nounwind readnone }
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll

index 59267f8b5c6e61b636920a90889cc608db9c9239..79bb5aff8f4d035eaf495278fb3daa1d9b13a08d 100644 (file)
--- a/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -86,8 +86,9 @@ main_body:
    %tmp47 = insertelement <2 x i32> undef, i32 %tmp45, i32 0
    %tmp48 = insertelement <2 x i32> %tmp47, i32 %tmp46, i32 1
    %tmp39.bc = bitcast <16 x i8> %tmp39 to <4 x i32>
-  %tmp49 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp48, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp50 = extractelement <4 x float> %tmp49, i32 2
+  %a.bc.i = bitcast <2 x i32> %tmp48 to <2 x float>
+  %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp37, <4 x i32> %tmp39.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp50 = extractelement <4 x float> %tmp1, i32 2
    %tmp51 = call float @llvm.fabs.f32(float %tmp50)
    %tmp52 = fmul float %p2.i18, %p2.i18
    %tmp53 = fmul float %p2.i12, %p2.i12
@@ -239,17 +240,17 @@ entry:
    br i1 %tmp27, label %if, label %else
  
  if:                                               ; preds = %entry
-  %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %val.if.0 = extractelement <4 x float> %val.if, i32 0
-  %val.if.1 = extractelement <4 x float> %val.if, i32 1
-  %val.if.2 = extractelement <4 x float> %val.if, i32 2
+  %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36D6000000000000, float 0x36DA000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %val.if.0 = extractelement <4 x float> %tmp1, i32 0
+  %val.if.1 = extractelement <4 x float> %tmp1, i32 1
+  %val.if.2 = extractelement <4 x float> %tmp1, i32 2
    br label %endif
  
  else:                                             ; preds = %entry
-  %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %val.else.0 = extractelement <4 x float> %val.else, i32 0
-  %val.else.1 = extractelement <4 x float> %val.else, i32 1
-  %val.else.2 = extractelement <4 x float> %val.else, i32 2
+  %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0x36C4000000000000, float 0x36CC000000000000>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %val.else.0 = extractelement <4 x float> %tmp2, i32 0
+  %val.else.1 = extractelement <4 x float> %tmp2, i32 1
+  %val.else.2 = extractelement <4 x float> %tmp2, i32 2
    br label %endif
  
  endif:                                            ; preds = %else, %if
@@ -356,7 +357,8 @@ bb38:                                             ; preds = %bb
    %tmp54 = insertelement <2 x i32> undef, i32 %tmp52, i32 0
    %tmp55 = insertelement <2 x i32> %tmp54, i32 %tmp53, i32 1
    %tmp56 = bitcast <8 x i32> %tmp26 to <8 x i32>
-  %tmp58 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp55, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %a.bc.i = bitcast <2 x i32> %tmp55 to <2 x float>
+  %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp56, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    br label %bb71
  
  bb80:                                             ; preds = %bb
@@ -366,11 +368,12 @@ bb80:                                             ; preds = %bb
    %tmp83 = insertelement <2 x i32> undef, i32 %tmp81, i32 0
    %tmp84 = insertelement <2 x i32> %tmp83, i32 %tmp82.2, i32 1
    %tmp85 = bitcast <8 x i32> %tmp26 to <8 x i32>
-  %tmp87 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp84, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %a.bc.i1 = bitcast <2 x i32> %tmp84 to <2 x float>
+  %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp85, <4 x i32> %tmp28, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    br label %bb71
  
  bb71:                                             ; preds = %bb80, %bb38
-  %tmp72 = phi <4 x float> [ %tmp58, %bb38 ], [ %tmp87, %bb80 ]
+  %tmp72 = phi <4 x float> [ %tmp2, %bb38 ], [ %tmp3, %bb80 ]
    %tmp88 = extractelement <4 x float> %tmp72, i32 0
    call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %tmp88, float %tmp88, float %tmp88, float %tmp88, i1 true, i1 true) #0
    ret void
@@ -384,8 +387,8 @@ bb:
    %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
    %tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
-  %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp10 = extractelement <4 x float> %tmp9, i32 0
+  %tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp10 = extractelement <4 x float> %tmp, i32 0
    %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %tmp10)
    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
    ret void
@@ -399,8 +402,8 @@ bb:
    %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
    %tmp7 = getelementptr [17 x <4 x i32>], [17 x <4 x i32>] addrspace(2)* %arg, i32 0, i32 %tid
    %tmp8 = load <4 x i32>, <4 x i32> addrspace(2)* %tmp7, align 16, !tbaa !0
-  %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp10 = extractelement <4 x float> %tmp9, i32 0
+  %tmp = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 7.500000e-01, float 2.500000e-01>, <8 x i32> undef, <4 x i32> %tmp8, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp10 = extractelement <4 x float> %tmp, i32 0
    %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
    ret void
@@ -416,12 +419,12 @@ declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
  declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
  declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  declare float @llvm.SI.load.const(<16 x i8>, i32) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
  
  !0 = !{!1, !1, i64 0, i32 1}
  !1 = !{!"const", !2}
diff --git a/test/CodeGen/AMDGPU/si-lod-bias.ll b/test/CodeGen/AMDGPU/si-lod-bias.ll

index d64a0f3a166c46ab9e9feadc07d7921fe75af30a..3a7359ea4ffaf196971ebe8d1bda57bf49d3dfc4 100644 (file)
--- a/test/CodeGen/AMDGPU/si-lod-bias.ll
+++ b/test/CodeGen/AMDGPU/si-lod-bias.ll
@@ -35,7 +35,8 @@ main_body:
    %tmp33 = insertelement <4 x i32> %tmp32, i32 %tmp30, i32 2
    %tmp34 = insertelement <4 x i32> %tmp33, i32 undef, i32 3
    %tmp25.bc = bitcast <16 x i8> %tmp25 to <4 x i32>
-  %tmp35 = call <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32> %tmp34, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp34.bc = bitcast <4 x i32> %tmp34 to <4 x float>
+  %tmp35 = call <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float> %tmp34.bc, <8 x i32> %tmp23, <4 x i32> %tmp25.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp36 = extractelement <4 x float> %tmp35, i32 0
    %tmp37 = extractelement <4 x float> %tmp35, i32 1
    %tmp38 = extractelement <4 x float> %tmp35, i32 2
@@ -47,12 +48,12 @@ main_body:
  declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
  declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-
+declare <4 x float> @llvm.amdgcn.image.sample.b.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-declare <4 x float> @llvm.SI.image.sample.b.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
  
  !0 = !{!1, !1, i64 0, i32 1}
  !1 = !{!"const", !2}
diff --git a/test/CodeGen/AMDGPU/si-scheduler.ll b/test/CodeGen/AMDGPU/si-scheduler.ll

index 08f7972cbb1052b6d710cff84fe7795112686abb..462528c4ff1a8ad556c37382f3d40e7e58986c9f 100644 (file)
--- a/test/CodeGen/AMDGPU/si-scheduler.ll
+++ b/test/CodeGen/AMDGPU/si-scheduler.ll
@@ -40,7 +40,9 @@ main_body:
    %tmp30 = insertelement <2 x i32> %tmp29, i32 %tmp28, i32 1
    %tmp22.bc = bitcast <32 x i8> %tmp22 to <8 x i32>
    %tmp24.bc = bitcast <16 x i8> %tmp24 to <4 x i32>
-  %tmp31 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp30, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp30.bc = bitcast <2 x i32> %tmp30 to <2 x float>
+  %tmp31 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp30.bc, <8 x i32> %tmp22.bc, <4 x i32> %tmp24.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+
    %tmp32 = extractelement <4 x float> %tmp31, i32 0
    %tmp33 = extractelement <4 x float> %tmp31, i32 1
    %tmp34 = extractelement <4 x float> %tmp31, i32 2
@@ -54,12 +56,12 @@ main_body:
  declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
  declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
  declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
-
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
  declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
  
  !0 = !{!1, !1, i64 0, i32 1}
  !1 = !{!"const", !2}
diff --git a/test/CodeGen/AMDGPU/si-sgpr-spill.ll b/test/CodeGen/AMDGPU/si-sgpr-spill.ll

index bcaaba7240337156d8ef4c559ddf486752b5bcb9..8731e74d63a057d4c6b9b0473c3d70e0fdffcbff 100644 (file)
--- a/test/CodeGen/AMDGPU/si-sgpr-spill.ll
+++ b/test/CodeGen/AMDGPU/si-sgpr-spill.ll
@@ -364,7 +364,8 @@ ENDIF:                                            ; preds = %LOOP
    %tmp240 = insertelement <8 x i32> %tmp239, i32 %tmp238, i32 5
    %tmp241 = insertelement <8 x i32> %tmp240, i32 undef, i32 6
    %tmp242 = insertelement <8 x i32> %tmp241, i32 undef, i32 7
-  %tmp243 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp242, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp242.bc = bitcast <8 x i32> %tmp242 to <8 x float>
+  %tmp243 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp242.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp244 = extractelement <4 x float> %tmp243, i32 3
    %tmp245 = fcmp oge float %temp30.0, %tmp244
    %tmp246 = sext i1 %tmp245 to i32
@@ -410,7 +411,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp275 = insertelement <8 x i32> %tmp274, i32 undef, i32 6
    %tmp276 = insertelement <8 x i32> %tmp275, i32 undef, i32 7
    %tmp67.bc = bitcast <16 x i8> %tmp67 to <4 x i32>
-  %tmp277 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp276, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp276.bc = bitcast <8 x i32> %tmp276 to <8 x float>
+  %tmp277 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp276.bc, <8 x i32> %tmp65, <4 x i32> %tmp67.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp278 = extractelement <4 x float> %tmp277, i32 0
    %tmp279 = extractelement <4 x float> %tmp277, i32 1
    %tmp280 = extractelement <4 x float> %tmp277, i32 2
@@ -431,7 +433,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp295 = insertelement <8 x i32> %tmp294, i32 undef, i32 6
    %tmp296 = insertelement <8 x i32> %tmp295, i32 undef, i32 7
    %tmp83.bc = bitcast <16 x i8> %tmp83 to <4 x i32>
-  %tmp297 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp296, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp296.bc = bitcast <8 x i32> %tmp296 to <8 x float>
+  %tmp297 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp296.bc, <8 x i32> %tmp81, <4 x i32> %tmp83.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp298 = extractelement <4 x float> %tmp297, i32 0
    %tmp299 = extractelement <4 x float> %tmp297, i32 1
    %tmp300 = extractelement <4 x float> %tmp297, i32 2
@@ -450,7 +453,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp313 = insertelement <8 x i32> %tmp312, i32 undef, i32 6
    %tmp314 = insertelement <8 x i32> %tmp313, i32 undef, i32 7
    %tmp79.bc = bitcast <16 x i8> %tmp79 to <4 x i32>
-  %tmp315 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp314, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp314.bc = bitcast <8 x i32> %tmp314 to <8 x float>
+  %tmp315 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp314.bc, <8 x i32> %tmp77, <4 x i32> %tmp79.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp316 = extractelement <4 x float> %tmp315, i32 0
    %tmp317 = extractelement <4 x float> %tmp315, i32 1
    %tmp318 = extractelement <4 x float> %tmp315, i32 2
@@ -480,7 +484,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp342 = insertelement <8 x i32> %tmp341, i32 %tmp336, i32 5
    %tmp343 = insertelement <8 x i32> %tmp342, i32 undef, i32 6
    %tmp344 = insertelement <8 x i32> %tmp343, i32 undef, i32 7
-  %tmp345 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp344, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp344.bc = bitcast <8 x i32> %tmp344 to <8 x float>
+  %tmp345 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp344.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp346 = extractelement <4 x float> %tmp345, i32 0
    %tmp347 = extractelement <4 x float> %tmp345, i32 1
    %tmp348 = extractelement <4 x float> %tmp345, i32 2
@@ -511,7 +516,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp373 = insertelement <8 x i32> %tmp372, i32 undef, i32 6
    %tmp374 = insertelement <8 x i32> %tmp373, i32 undef, i32 7
    %tmp71.bc = bitcast <16 x i8> %tmp71 to <4 x i32>
-  %tmp375 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp374, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp374.bc = bitcast <8 x i32> %tmp374 to <8 x float>
+  %tmp375 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp374.bc, <8 x i32> %tmp69, <4 x i32> %tmp71.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp376 = extractelement <4 x float> %tmp375, i32 0
    %tmp377 = extractelement <4 x float> %tmp375, i32 1
    %tmp378 = extractelement <4 x float> %tmp375, i32 2
@@ -566,7 +572,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp427 = insertelement <8 x i32> %tmp426, i32 undef, i32 6
    %tmp428 = insertelement <8 x i32> %tmp427, i32 undef, i32 7
    %tmp87.bc = bitcast <16 x i8> %tmp87 to <4 x i32>
-  %tmp429 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp428, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp428.bc = bitcast <8 x i32> %tmp428 to <8 x float>
+  %tmp429 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp428.bc, <8 x i32> %tmp85, <4 x i32> %tmp87.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp430 = extractelement <4 x float> %tmp429, i32 0
    %tmp431 = extractelement <4 x float> %tmp429, i32 1
    %tmp432 = extractelement <4 x float> %tmp429, i32 2
@@ -618,7 +625,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp468 = insertelement <4 x i32> %tmp467, i32 %tmp465, i32 2
    %tmp469 = insertelement <4 x i32> %tmp468, i32 undef, i32 3
    %tmp91.bc = bitcast <16 x i8> %tmp91 to <4 x i32>
-  %tmp470 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tmp469, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp469.bc = bitcast <4 x i32> %tmp469 to <4 x float>
+  %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp469.bc, <8 x i32> %tmp89, <4 x i32> %tmp91.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %tmp471 = extractelement <4 x float> %tmp470, i32 0
    %tmp472 = extractelement <4 x float> %tmp470, i32 1
    %tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -720,7 +728,8 @@ IF67:                                             ; preds = %LOOP65
    %tmp569 = insertelement <8 x i32> %tmp568, i32 undef, i32 6
    %tmp570 = insertelement <8 x i32> %tmp569, i32 undef, i32 7
    %tmp75.bc = bitcast <16 x i8> %tmp75 to <4 x i32>
-  %tmp571 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp570, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp570.bc = bitcast <8 x i32> %tmp570 to <8 x float>
+  %tmp571 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp570.bc, <8 x i32> %tmp73, <4 x i32> %tmp75.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp572 = extractelement <4 x float> %tmp571, i32 0
    %tmp573 = extractelement <4 x float> %tmp571, i32 1
    %tmp574 = extractelement <4 x float> %tmp571, i32 2
@@ -742,7 +751,8 @@ ENDIF66:                                          ; preds = %LOOP65
    %tmp588 = insertelement <8 x i32> %tmp587, i32 %tmp586, i32 5
    %tmp589 = insertelement <8 x i32> %tmp588, i32 undef, i32 6
    %tmp590 = insertelement <8 x i32> %tmp589, i32 undef, i32 7
-  %tmp591 = call <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32> %tmp590, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp590.bc = bitcast <8 x i32> %tmp590 to <8 x float>
+  %tmp591 = call <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float> %tmp590.bc, <8 x i32> %tmp61, <4 x i32> %tmp63.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp592 = extractelement <4 x float> %tmp591, i32 3
    %tmp593 = fcmp oge float %temp30.1, %tmp592
    %tmp594 = sext i1 %tmp593 to i32
@@ -768,7 +778,7 @@ ENDIF66:                                          ; preds = %LOOP65
  ; GCN-LABEL: {{^}}main1:
  ; GCN: s_endpgm
  ; TOVGPR: ScratchSize: 0{{$}}
-define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) {
+define amdgpu_ps void @main1([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
  main_body:
    %tmp = getelementptr [17 x <16 x i8>], [17 x <16 x i8>] addrspace(2)* %arg, i64 0, i32 0
    %tmp21 = load <16 x i8>, <16 x i8> addrspace(2)* %tmp, !tbaa !0
@@ -1135,7 +1145,8 @@ main_body:
    %tmp223 = insertelement <2 x i32> undef, i32 %tmp221, i32 0
    %tmp224 = insertelement <2 x i32> %tmp223, i32 %tmp222, i32 1
    %tmp132.bc = bitcast <16 x i8> %tmp132 to <4 x i32>
-  %tmp225 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp224, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp224.bc = bitcast <2 x i32> %tmp224 to <2 x float>
+  %tmp225 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp224.bc, <8 x i32> %tmp130, <4 x i32> %tmp132.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp226 = extractelement <4 x float> %tmp225, i32 0
    %tmp227 = extractelement <4 x float> %tmp225, i32 1
    %tmp228 = extractelement <4 x float> %tmp225, i32 2
@@ -1208,7 +1219,8 @@ LOOP:                                             ; preds = %LOOP, %main_body
    %tmp280 = insertelement <4 x i32> %tmp279, i32 0, i32 2
    %tmp281 = insertelement <4 x i32> %tmp280, i32 undef, i32 3
    %tmp148.bc = bitcast <16 x i8> %tmp148 to <4 x i32>
-  %tmp282 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp281, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp281.bc = bitcast <4 x i32> %tmp281 to <4 x float>
+  %tmp282 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp281.bc, <8 x i32> %tmp146, <4 x i32> %tmp148.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp283 = extractelement <4 x float> %tmp282, i32 3
    %tmp284 = fadd float %temp168.0, %tmp273
    %tmp285 = fadd float %temp169.0, %tmp274
@@ -1272,11 +1284,12 @@ IF189:                                            ; preds = %LOOP
    %tmp340 = insertelement <2 x i32> undef, i32 %tmp338, i32 0
    %tmp341 = insertelement <2 x i32> %tmp340, i32 %tmp339, i32 1
    %tmp136.bc = bitcast <16 x i8> %tmp136 to <4 x i32>
-  %tmp342 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp341, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp343 = extractelement <4 x float> %tmp342, i32 0
-  %tmp344 = extractelement <4 x float> %tmp342, i32 1
-  %tmp345 = extractelement <4 x float> %tmp342, i32 2
-  %tmp346 = extractelement <4 x float> %tmp342, i32 3
+  %a.bc.i = bitcast <2 x i32> %tmp341 to <2 x float>
+  %tmp0 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i, <8 x i32> %tmp134, <4 x i32> %tmp136.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp343 = extractelement <4 x float> %tmp0, i32 0
+  %tmp344 = extractelement <4 x float> %tmp0, i32 1
+  %tmp345 = extractelement <4 x float> %tmp0, i32 2
+  %tmp346 = extractelement <4 x float> %tmp0, i32 3
    %tmp347 = fmul float %tmp343, %tmp22
    %tmp348 = fmul float %tmp344, %tmp23
    %tmp349 = fmul float %tmp345, %tmp24
@@ -1305,8 +1318,9 @@ IF189:                                            ; preds = %LOOP
    %tmp360 = insertelement <2 x i32> undef, i32 %tmp358, i32 0
    %tmp361 = insertelement <2 x i32> %tmp360, i32 %tmp359, i32 1
    %tmp152.bc = bitcast <16 x i8> %tmp152 to <4 x i32>
-  %tmp362 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp361, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp363 = extractelement <4 x float> %tmp362, i32 2
+  %a.bc.i3 = bitcast <2 x i32> %tmp361 to <2 x float>
+  %tmp1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i3, <8 x i32> %tmp150, <4 x i32> %tmp152.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp363 = extractelement <4 x float> %tmp1, i32 2
    %tmp364 = fmul float %result.i40, %result.i
    %tmp365 = fmul float %result.i36, %result.i44
    %tmp366 = fmul float %result.i32, %result.i42
@@ -1316,11 +1330,12 @@ IF189:                                            ; preds = %LOOP
    %tmp370 = insertelement <2 x i32> undef, i32 %tmp368, i32 0
    %tmp371 = insertelement <2 x i32> %tmp370, i32 %tmp369, i32 1
    %tmp140.bc = bitcast <16 x i8> %tmp140 to <4 x i32>
-  %tmp372 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp371, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp373 = extractelement <4 x float> %tmp372, i32 0
-  %tmp374 = extractelement <4 x float> %tmp372, i32 1
-  %tmp375 = extractelement <4 x float> %tmp372, i32 2
-  %tmp376 = extractelement <4 x float> %tmp372, i32 3
+  %a.bc.i2 = bitcast <2 x i32> %tmp371 to <2 x float>
+  %tmp2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i2, <8 x i32> %tmp138, <4 x i32> %tmp140.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp373 = extractelement <4 x float> %tmp2, i32 0
+  %tmp374 = extractelement <4 x float> %tmp2, i32 1
+  %tmp375 = extractelement <4 x float> %tmp2, i32 2
+  %tmp376 = extractelement <4 x float> %tmp2, i32 3
    %tmp377 = fcmp olt float 0.000000e+00, %tmp375
    %tmp378 = sext i1 %tmp377 to i32
    %tmp379 = bitcast i32 %tmp378 to float
@@ -1333,11 +1348,12 @@ IF189:                                            ; preds = %LOOP
    %tmp384 = insertelement <2 x i32> undef, i32 %tmp382, i32 0
    %tmp385 = insertelement <2 x i32> %tmp384, i32 %tmp383, i32 1
    %tmp144.bc = bitcast <16 x i8> %tmp144 to <4 x i32>
-  %tmp386 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp385, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tmp387 = extractelement <4 x float> %tmp386, i32 0
-  %tmp388 = extractelement <4 x float> %tmp386, i32 1
-  %tmp389 = extractelement <4 x float> %tmp386, i32 2
-  %tmp390 = extractelement <4 x float> %tmp386, i32 3
+  %a.bc.i1 = bitcast <2 x i32> %tmp385 to <2 x float>
+  %tmp3 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %a.bc.i1, <8 x i32> %tmp142, <4 x i32> %tmp144.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+  %tmp387 = extractelement <4 x float> %tmp3, i32 0
+  %tmp388 = extractelement <4 x float> %tmp3, i32 1
+  %tmp389 = extractelement <4 x float> %tmp3, i32 2
+  %tmp390 = extractelement <4 x float> %tmp3, i32 3
    %tmp391 = fcmp olt float 0.000000e+00, %tmp389
    %tmp392 = sext i1 %tmp391 to i32
    %tmp393 = bitcast i32 %tmp392 to float
@@ -1431,7 +1447,8 @@ ENDIF197:                                         ; preds = %IF198, %IF189
    %tmp468 = insertelement <2 x i32> undef, i32 %tmp466, i32 0
    %tmp469 = insertelement <2 x i32> %tmp468, i32 %tmp467, i32 1
    %tmp160.bc = bitcast <16 x i8> %tmp160 to <4 x i32>
-  %tmp470 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp469, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp469.bc = bitcast <2 x i32> %tmp469 to <2 x float>
+  %tmp470 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp469.bc, <8 x i32> %tmp158, <4 x i32> %tmp160.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp471 = extractelement <4 x float> %tmp470, i32 0
    %tmp472 = extractelement <4 x float> %tmp470, i32 1
    %tmp473 = extractelement <4 x float> %tmp470, i32 2
@@ -1449,7 +1466,8 @@ ENDIF197:                                         ; preds = %IF198, %IF189
    %tmp485 = insertelement <2 x i32> undef, i32 %tmp483, i32 0
    %tmp486 = insertelement <2 x i32> %tmp485, i32 %tmp484, i32 1
    %tmp156.bc = bitcast <16 x i8> %tmp156 to <4 x i32>
-  %tmp487 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %tmp486, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp486.bc = bitcast <2 x i32> %tmp486 to <2 x float>
+  %tmp487 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> %tmp486.bc, <8 x i32> %tmp154, <4 x i32> %tmp156.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp488 = extractelement <4 x float> %tmp487, i32 0
    %tmp489 = extractelement <4 x float> %tmp487, i32 1
    %tmp490 = extractelement <4 x float> %tmp487, i32 2
@@ -1657,7 +1675,8 @@ ENDIF209:                                         ; preds = %ELSE214, %ELSE211,
    %tmp658 = insertelement <4 x i32> %tmp657, i32 %tmp655, i32 2
    %tmp659 = insertelement <4 x i32> %tmp658, i32 undef, i32 3
    %tmp128.bc = bitcast <16 x i8> %tmp128 to <4 x i32>
-  %tmp660 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp659, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp659.bc = bitcast <4 x i32> %tmp659 to <4 x float>
+  %tmp660 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp659.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp661 = extractelement <4 x float> %tmp660, i32 0
    %tmp662 = extractelement <4 x float> %tmp660, i32 1
    %tmp663 = bitcast float %tmp646 to i32
@@ -1667,7 +1686,8 @@ ENDIF209:                                         ; preds = %ELSE214, %ELSE211,
    %tmp667 = insertelement <4 x i32> %tmp666, i32 %tmp664, i32 1
    %tmp668 = insertelement <4 x i32> %tmp667, i32 %tmp665, i32 2
    %tmp669 = insertelement <4 x i32> %tmp668, i32 undef, i32 3
-  %tmp670 = call <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32> %tmp669, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp669.bc = bitcast <4 x i32> %tmp669 to <4 x float>
+  %tmp670 = call <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float> %tmp669.bc, <8 x i32> %tmp126, <4 x i32> %tmp128.bc, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp671 = extractelement <4 x float> %tmp670, i32 0
    %tmp672 = extractelement <4 x float> %tmp670, i32 1
    %tmp673 = fsub float -0.000000e+00, %tmp662
@@ -1845,15 +1865,15 @@ declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #1
  declare i32 @llvm.amdgcn.mbcnt.hi(i32, i32) #1
  declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
  declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.l.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.d.v8i32(<8 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.d.v4f32.v8f32.v8i32(<8 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.l.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  declare float @llvm.SI.load.const(<16 x i8>, i32) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
  
  !0 = !{!1, !1, i64 0, i32 1}
  !1 = !{!"const", !2}
diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll

index 60cee7a3499e3b914189c68658a2eacdee603d0b..3f53572ab44033373743f8be476e73566fbb53e1 100644 (file)
--- a/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -357,7 +357,7 @@ bb7:                                              ; preds = %bb4
  ; CHECK: [[END]]:
  ; CHECK: s_or_b64 exec, exec
  ; CHECK: s_endpgm
-define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x i32> %arg2) #0 {
+define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, <4 x float> %arg2) #0 {
  bb:
    %tmp = fcmp ult float %arg1, 0.000000e+00
    br i1 %tmp, label %bb3, label %bb4
@@ -367,7 +367,7 @@ bb3:                                              ; preds = %bb
    br label %bb4
  
  bb4:                                              ; preds = %bb3, %bb
-  %tmp5 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %arg2, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp6 = extractelement <4 x float> %tmp5, i32 0
    %tmp7 = fcmp une float %tmp6, 0.000000e+00
    br i1 %tmp7, label %bb8, label %bb9
@@ -380,9 +380,8 @@ bb9:                                              ; preds = %bb4
    ret void
  }
  
+declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
  declare void @llvm.AMDGPU.kill(float) #0
-declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) nounwind
  
  attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/split-smrd.ll b/test/CodeGen/AMDGPU/split-smrd.ll

index 09d1f5703c0caad6b5b30569711eae3f05f72d82..cdb1b1e3b5032de3a5b3b5c1adfdd6f35914bdff 100644 (file)
--- a/test/CodeGen/AMDGPU/split-smrd.ll
+++ b/test/CodeGen/AMDGPU/split-smrd.ll
@@ -21,21 +21,21 @@ bb3:                                              ; preds = %bb
    %tmp6 = sext i32 %tmp5 to i64
    %tmp7 = getelementptr [34 x <8 x i32>], [34 x <8 x i32>] addrspace(2)* %arg, i64 0, i64 %tmp6
    %tmp8 = load <8 x i32>, <8 x i32> addrspace(2)* %tmp7, align 32, !tbaa !0
-  %tmp9 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1061158912, i32 1048576000>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp9 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float bitcast (i32 1061158912 to float), float bitcast (i32 1048576000 to float)>, <8 x i32> %tmp8, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp10 = extractelement <4 x float> %tmp9, i32 0
    %tmp12 = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %tmp10, float undef)
    call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %tmp12, <2 x half> undef, i1 true, i1 true) #0
    ret void
  }
  
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
  declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) #1
-
+declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  declare float @llvm.SI.load.const(<16 x i8>, i32) #1
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind readonly }
  
  !0 = !{!1, !1, i64 0, i32 1}
  !1 = !{!"const", !2}
diff --git a/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll b/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll

index 70a150a6bad4e09ab26c315137a6cc20081df34a..0ec45c68bb769f691e88e5d30213814cf877fe60 100644 (file)
--- a/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
+++ b/test/CodeGen/AMDGPU/subreg-coalescer-crash.ll
@@ -2,7 +2,7 @@
  ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
  
  ; GCN-LABEL:{{^}}row_filter_C1_D0:
-define void @row_filter_C1_D0() {
+define void @row_filter_C1_D0() #0 {
  entry:
    br i1 undef, label %for.inc.1, label %do.body.preheader
  
@@ -65,7 +65,7 @@ bb7:                                              ; preds = %bb6
    br label %bb4
  
  bb9:                                              ; preds = %bb2
-  %tmp10 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp10 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp11 = extractelement <4 x float> %tmp10, i32 1
    %tmp12 = extractelement <4 x float> %tmp10, i32 3
    br label %bb14
@@ -95,8 +95,9 @@ bb27:                                             ; preds = %bb24
    br label %bb14
  }
  
+
  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
  
  attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll

index 4902e9a3cafb2409d104c468d230c41853e92726..f92a847b3e55f4b9ae8d4e50b635869e5d2966e4 100644 (file)
--- a/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -35,7 +35,8 @@ bb:
    %tmp1 = load volatile i32, i32 addrspace(1)* undef, align 4
    %tmp2 = insertelement <4 x i32> undef, i32 %tmp1, i32 0
    %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1
-  %tmp4 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tmp3, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp3.cast = bitcast <4 x i32> %tmp3 to <4 x float>
+  %tmp4 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tmp3.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp5 = extractelement <4 x float> %tmp4, i32 0
    %tmp6 = fmul float %tmp5, undef
    %tmp7 = fadd float %tmp6, %tmp6
@@ -83,8 +84,7 @@ define void @partially_undef_copy() #0 {
    ret void
  }
  
-declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-declare float @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #1
  
  attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
+attributes #1 = { nounwind readonly }
diff --git a/test/CodeGen/AMDGPU/unigine-liveness-crash.ll b/test/CodeGen/AMDGPU/unigine-liveness-crash.ll

index 44d88ddc45de9d5678cd5f2b5708d7aaac821a97..853131baed5e750e026abcfe4a7798512eb4dc68 100644 (file)
--- a/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
+++ b/test/CodeGen/AMDGPU/unigine-liveness-crash.ll
@@ -17,7 +17,8 @@ main_body:
    %j.f.i = bitcast i32 %j.i to float
    %p1.i = call float @llvm.amdgcn.interp.p1(float %i.f.i, i32 3, i32 4, i32 %arg6) #2
    %p2.i = call float @llvm.amdgcn.interp.p2(float %p1.i, float %j.f.i, i32 3, i32 4, i32 %arg6) #2
-  %tmp23 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp23 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
+
    %tmp24 = extractelement <4 x float> %tmp23, i32 3
    %tmp25 = fmul float %tmp24, undef
    %tmp26 = fmul float undef, %p2.i
@@ -26,14 +27,15 @@ main_body:
    %tmp29 = insertelement <4 x i32> undef, i32 %tmp28, i32 0
    %tmp30 = insertelement <4 x i32> %tmp29, i32 0, i32 1
    %tmp31 = insertelement <4 x i32> %tmp30, i32 undef, i32 2
-  %tmp32 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %tmp31, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp31.cast = bitcast <4 x i32> %tmp31 to <4 x float>
+  %tmp32 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp31.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp33 = extractelement <4 x float> %tmp32, i32 0
    %tmp34 = fadd float undef, %tmp33
    %tmp35 = fadd float %tmp34, undef
    %tmp36 = fadd float %tmp35, undef
    %tmp37 = fadd float %tmp36, undef
    %tmp38 = fadd float %tmp37, undef
-  %tmp39 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp39 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp40 = extractelement <4 x float> %tmp39, i32 0
    %tmp41 = extractelement <4 x float> %tmp39, i32 1
    %tmp42 = extractelement <4 x float> %tmp39, i32 2
@@ -50,7 +52,8 @@ main_body:
    %tmp53 = insertelement <4 x i32> undef, i32 %tmp50, i32 0
    %tmp54 = insertelement <4 x i32> %tmp53, i32 %tmp51, i32 1
    %tmp55 = insertelement <4 x i32> %tmp54, i32 %tmp52, i32 2
-  %tmp56 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %tmp55, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp55.cast = bitcast <4 x i32> %tmp55 to <4 x float>
+  %tmp56 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp55.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp57 = extractelement <4 x float> %tmp56, i32 0
    %tmp58 = fadd float %tmp38, %tmp57
    %tmp59 = fadd float undef, %tmp46
@@ -59,7 +62,8 @@ main_body:
    %tmp62 = bitcast float %tmp60 to i32
    %tmp63 = insertelement <4 x i32> undef, i32 %tmp61, i32 1
    %tmp64 = insertelement <4 x i32> %tmp63, i32 %tmp62, i32 2
-  %tmp65 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %tmp64, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp64.cast = bitcast <4 x i32> %tmp64 to <4 x float>
+  %tmp65 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp64.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp66 = extractelement <4 x float> %tmp65, i32 0
    %tmp67 = fadd float %tmp58, %tmp66
    %tmp68 = fmul float %tmp67, 1.250000e-01
@@ -99,33 +103,22 @@ IF29:                                             ; preds = %LOOP
  ENDIF28:                                          ; preds = %LOOP
    %tmp85 = insertelement <4 x i32> %tmp72, i32 undef, i32 1
    %tmp86 = insertelement <4 x i32> %tmp85, i32 undef, i32 2
-  %tmp87 = call <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32> %tmp86, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tmp86.cast = bitcast <4 x i32> %tmp86 to <4 x float>
+  %tmp87 = call <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float> %tmp86.cast, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false)
    %tmp88 = extractelement <4 x float> %tmp87, i32 0
    %tmp89 = fadd float undef, %tmp88
    br label %LOOP
  }
  
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.image.sample.c.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
-
-; Function Attrs: nounwind readnone
  declare float @llvm.minnum.f32(float, float) #1
-
-; Function Attrs: nounwind readnone
  declare float @llvm.maxnum.f32(float, float) #1
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #1
+declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #1
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.sample.c.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #2
  
  attributes #0 = { nounwind "InitialPSInputAddr"="36983" "target-cpu"="tonga" }
  attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
+attributes #2 = { nounwind readonly }
+attributes #3 = { nounwind }
diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll

index e06116ce7344f49980a6b1591a26c87558f81cd6..9f277b2c9a59da7620e1ddfae46888e595528caf 100644 (file)
--- a/test/CodeGen/AMDGPU/wqm.ll
+++ b/test/CodeGen/AMDGPU/wqm.ll
@@ -18,9 +18,9 @@ main_body:
  ;CHECK-NEXT: ; %main_body
  ;CHECK-NEXT: s_wqm_b64 exec, exec
  ;CHECK-NOT: exec
-define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
+define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x float> %c) {
  main_body:
-  %c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %c.1 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %c.2 = bitcast <4 x float> %c.1 to <4 x i32>
    %c.3 = extractelement <4 x i32> %c.2, i32 0
    %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
@@ -40,9 +40,9 @@ main_body:
  ;CHECK: store
  ;CHECK-NOT: exec
  ;CHECK: .size test3
-define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x i32> %c) {
+define amdgpu_ps <4 x float> @test3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, <4 x float> %c) {
  main_body:
-  %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %tex.1 = bitcast <4 x float> %tex to <4 x i32>
    %tex.2 = extractelement <4 x i32> %tex.1, i32 0
  
@@ -68,10 +68,9 @@ main_body:
    %c.1 = mul i32 %c, %d
  
    call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> undef, <4 x i32> undef, i32 %c.1, i32 0, i1 0, i1 0)
-
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.1 = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %c.1.bc = bitcast i32 %c.1 to float
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.1.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    ret <4 x float> %dtex
  }
  
@@ -99,9 +98,9 @@ main_body:
    br i1 %cmp, label %IF, label %ELSE
  
  IF:
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.1 = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %c.bc = bitcast i32 %c to float
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %data.if = extractelement <4 x float> %dtex, i32 0
    br label %END
  
@@ -141,9 +140,9 @@ main_body:
    br i1 %cmp, label %ELSE, label %IF
  
  IF:
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.1 = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %c.bc = bitcast i32 %c to float
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %data.if = extractelement <4 x float> %dtex, i32 0
    br label %END
  
@@ -198,7 +197,8 @@ ELSE:
  
  END:
    %coord.END = phi i32 [ %coord.IF, %IF ], [ %coord.ELSE, %ELSE ]
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord.END, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %coord.END.bc = bitcast i32 %coord.END to float
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord.END.bc, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    ret <4 x float> %tex
  }
  
@@ -213,13 +213,11 @@ END:
  ;CHECK: image_sample
  ;CHECK: v_cmp
  ;CHECK: store
-define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, i32 %coord) {
+define amdgpu_ps float @test_control_flow_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %coord) {
  main_body:
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.1 = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %dtex.1 = extractelement <4 x float> %dtex, i32 0
-
    call void @llvm.amdgcn.buffer.store.f32(float %dtex.1, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
  
    %cc = fcmp ogt float %dtex.1, 0.0
@@ -252,7 +250,7 @@ END:
  ;CHECK: %END
  ;CHECK: image_sample
  ;CHECK: image_sample
-define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %coord, i32 %y, float %z) {
+define amdgpu_ps <4 x float> @test_control_flow_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float %coord, i32 %y, float %z) {
  main_body:
    %cond = icmp eq i32 %y, 0
    br i1 %cond, label %IF, label %END
@@ -263,9 +261,8 @@ IF:
    br label %END
  
  END:
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.1 = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    ret <4 x float> %dtex
  }
  
@@ -284,10 +281,9 @@ END:
  ;CHECK: buffer_store_dword
  ;CHECK: s_mov_b64 exec, [[SAVE]]
  ;CHECK: image_sample
-define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, i32 %coord, i32 %coord2, float %z) {
+define amdgpu_ps <4 x float> @test_kill_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <2 x i32> %idx, <2 x float> %data, float %coord, float %coord2, float %z) {
  main_body:
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %idx.0 = extractelement <2 x i32> %idx, i32 0
    %data.0 = extractelement <2 x float> %data, i32 0
    call void @llvm.amdgcn.buffer.store.f32(float %data.0, <4 x i32> undef, i32 %idx.0, i32 0, i1 0, i1 0)
@@ -297,10 +293,8 @@ main_body:
    %idx.1 = extractelement <2 x i32> %idx, i32 1
    %data.1 = extractelement <2 x float> %data, i32 1
    call void @llvm.amdgcn.buffer.store.f32(float %data.1, <4 x i32> undef, i32 %idx.1, i32 0, i1 0, i1 0)
-
-  %tex2 = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex2.1 = bitcast <4 x float> %tex2 to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex2.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex2 = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex2, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %out = fadd <4 x float> %tex, %dtex
  
    ret <4 x float> %out
@@ -318,11 +312,10 @@ main_body:
  ; CHECK: buffer_store_dword
  ; CHECK-NOT: wqm
  ; CHECK: v_cmpx_
-define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, i32 %coord, i32 %coord2, float %z) {
+define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) {
  main_body:
-  %tex = call <4 x float> @llvm.SI.image.sample.i32(i32 %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.1 = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.1, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %coord, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
  
    call void @llvm.amdgcn.buffer.store.f32(float %data, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
  
@@ -373,8 +366,7 @@ loop:
    br i1 %cc, label %break, label %body
  
  body:
-  %c.i = bitcast <4 x float> %c.iv to <4 x i32>
-  %c.next = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %c.next = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %c.iv, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %ctr.next = fadd float %ctr.iv, 2.0
    br label %loop
  
@@ -414,9 +406,8 @@ entry:
  
    %c.gep = getelementptr [32 x i32], [32 x i32]* %array, i32 0, i32 %idx
    %c = load i32, i32* %c.gep, align 4
-
-  %t = call <4 x float> @llvm.SI.image.sample.i32(i32 %c, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-
+  %c.bc = bitcast i32 %c to float
+  %t = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float %c.bc, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    call void @llvm.amdgcn.buffer.store.v4f32(<4 x float> %t, <4 x i32> undef, i32 0, i32 0, i1 0, i1 0)
  
    ret void
@@ -434,9 +425,8 @@ entry:
  ; CHECK: s_and_b64 exec, exec, [[LIVE]]
  ; CHECK-NOT: exec
  define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
-  %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.i = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    ret <4 x float> %dtex
  }
  
@@ -448,10 +438,8 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
  ; CHECK-NOT: exec
  define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
  entry:
-  %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-  %tex.i = bitcast <4 x float> %tex to <4 x i32>
-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
-
+  %tex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
+  %dtex = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float> %tex, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    %cc = icmp sgt i32 %c, 0
    br i1 %cc, label %if, label %else
  
@@ -483,33 +471,28 @@ main_body:
    br i1 %cc, label %if, label %else
  
  if:
-  %r.if = call <4 x float> @llvm.SI.image.sample.i32(i32 0, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %r.if = call <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float 0.0, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    br label %end
  
  else:
-  %r.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 0, i32 1>, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %r.else = call <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float> <float 0.0, float bitcast (i32 1 to float)>, <8 x i32> undef, <4 x i32> undef, i32 15, i1 false, i1 false, i1 false, i1 false, i1 false) #0
    br label %end
  
  end:
    %r = phi <4 x float> [ %r.if, %if ], [ %r.else, %else ]
-
    call void @llvm.amdgcn.buffer.store.f32(float 1.0, <4 x i32> undef, i32 %idx, i32 0, i1 0, i1 0)
-
    ret <4 x float> %r
  }
  
  declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
  declare void @llvm.amdgcn.image.store.v4f32.v4i32.v8i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
-declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
-declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #1
-
-declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #2
-declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #2
-
-declare <4 x float> @llvm.SI.image.sample.i32(i32, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
-declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
-declare <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #3
-
+declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #2
+declare void @llvm.amdgcn.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i1, i1) #2
+declare <4 x float> @llvm.amdgcn.image.load.v4f32.v4i32.v8i32(<4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #3
+declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #3
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.f32.v8i32(float, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v2f32.v8i32(<2 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
+declare <4 x float> @llvm.amdgcn.image.sample.v4f32.v4f32.v8i32(<4 x float>, <8 x i32>, <4 x i32>, i32, i1, i1, i1, i1, i1) #3
  declare void @llvm.AMDGPU.kill(float) #1
  
  attributes #1 = { nounwind }
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 21 Mar 2017 16:24:12 +0000 (16:24 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 21 Mar 2017 16:24:12 +0000 (16:24 +0000)
test/CodeGen/AMDGPU/coalescer-subrange-crash.ll		patch \| blob \| history
test/CodeGen/AMDGPU/commute-shifts.ll		patch \| blob \| history
test/CodeGen/AMDGPU/else.ll		patch \| blob \| history
test/CodeGen/AMDGPU/llvm.amdgcn.ps.live.ll		patch \| blob \| history
test/CodeGen/AMDGPU/sgpr-copy.ll		patch \| blob \| history
test/CodeGen/AMDGPU/si-lod-bias.ll		patch \| blob \| history
test/CodeGen/AMDGPU/si-scheduler.ll		patch \| blob \| history
test/CodeGen/AMDGPU/si-sgpr-spill.ll		patch \| blob \| history
test/CodeGen/AMDGPU/skip-if-dead.ll		patch \| blob \| history
test/CodeGen/AMDGPU/split-smrd.ll		patch \| blob \| history
test/CodeGen/AMDGPU/subreg-coalescer-crash.ll		patch \| blob \| history
test/CodeGen/AMDGPU/undefined-subreg-liverange.ll		patch \| blob \| history
test/CodeGen/AMDGPU/unigine-liveness-crash.ll		patch \| blob \| history
test/CodeGen/AMDGPU/wqm.ll		patch \| blob \| history