AMDGPU: Don't required structured CFG

author Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 6 Dec 2016 01:02:51 +0000 (01:02 +0000)

committer Matt Arsenault <Matthew.Arsenault@amd.com>

Tue, 6 Dec 2016 01:02:51 +0000 (01:02 +0000)
author Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 6 Dec 2016 01:02:51 +0000 (01:02 +0000)
committer Matt Arsenault <Matthew.Arsenault@amd.com>
Tue, 6 Dec 2016 01:02:51 +0000 (01:02 +0000)
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

index 7287b56aa6da698a01a333ff565e65f217ac93c5..e1fd95d09174554b6bc2a2ec104839cfdc80257a 100644 (file)
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -162,7 +162,6 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                        FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
      TLOF(createTLOF(getTargetTriple())),
      IntrinsicInfo() {
-  setRequiresStructuredCFG(true);
    initAsmInfo();
  }
  
@@ -191,7 +190,9 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
                                       TargetOptions Options,
                                       Optional<Reloc::Model> RM,
                                       CodeModel::Model CM, CodeGenOpt::Level OL)
-  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+  setRequiresStructuredCFG(true);
+}
  
  const R600Subtarget *R600TargetMachine::getSubtargetImpl(
    const Function &F) const {
diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll

index 83313ed5327c1b7720ba03547439d6792e105bc1..24874ee7fa982fd16f5dab1391544cecf0bd92aa 100644 (file)
--- a/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/test/CodeGen/AMDGPU/basic-branch.ll
@@ -8,13 +8,15 @@
  ; GCNNOOPT: v_writelane_b32
  ; GCN: s_cbranch_scc1 [[END:BB[0-9]+_[0-9]+]]
  
+
  ; GCN: ; BB#1
  ; GCNNOOPT: v_readlane_b32
  ; GCNNOOPT: v_readlane_b32
  ; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
  
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
  ; GCN: s_endpgm
  define void @test_branch(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i32 %val) #0 {
    %cmp = icmp ne i32 %val, 0
@@ -35,9 +37,10 @@ end:
  ; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
  
  ; GCN: buffer_store_dword
-; GCN: s_endpgm
+; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; TODO: This waitcnt can be eliminated
  
-; GCN: {{^}}[[END]]
+; GCN: {{^}}[[END]]:
  ; GCN: s_endpgm
  define void @test_brcc_i1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in, i1 %val) #0 {
    %cmp0 = icmp ne i1 %val, 0
diff --git a/test/CodeGen/AMDGPU/br_cc.f16.ll b/test/CodeGen/AMDGPU/br_cc.f16.ll

index 6cf3fdad3e3ff5b6b95fc47f56c4a9fe4e66ee4c..970260412c44a7624b0194528aa9c7e46e519f1e 100644 (file)
--- a/test/CodeGen/AMDGPU/br_cc.f16.ll
+++ b/test/CodeGen/AMDGPU/br_cc.f16.ll
@@ -12,9 +12,10 @@
  ; GCN: s_cbranch_vccnz
  
  ; GCN: one{{$}}
-; SI:  v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; SI: v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
+; SI: s_branch
+; VI: buffer_store_short
+; VI: s_endpgm
  
  ; GCN: two{{$}}
  ; SI:  v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
@@ -47,17 +48,19 @@ two:
  ; SI:  v_cvt_f32_f16_e32 v[[B_F32:[0-9]+]], v[[B_F16]]
  ; SI:  v_cmp_ngt_f32_e32 vcc, v[[B_F32]], v[[A_F32]]
  ; VI:  v_cmp_nle_f16_e32 vcc, v[[A_F16]], v[[B_F16]]
-; GCN: s_cbranch_vccnz
+; SI: s_cbranch_vccz
+; VI: s_cbranch_vccnz
  
-; GCN: one{{$}}
-; VI:  v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x3800{{$}}
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
+; VI: one{{$}}
+; VI: v_mov_b32_e32 v[[A_F16:[0-9]+]], 0x380{{0|1}}{{$}}
  
  ; GCN: two{{$}}
  ; SI:  v_cvt_f16_f32_e32 v[[B_F16:[0-9]+]], v[[B_F32]]
-; GCN: buffer_store_short v[[B_F16]]
-; GCN: s_endpgm
+
+; SI: one{{$}}
+; SI: buffer_store_short v[[A_F16]]
+; SI: s_endpgm
+
  define void @br_cc_f16_imm_a(
      half addrspace(1)* %r,
      half addrspace(1)* %b) {
@@ -87,8 +90,6 @@ two:
  
  ; GCN: one{{$}}
  ; SI:  v_cvt_f16_f32_e32 v[[A_F16:[0-9]+]], v[[A_F32]]
-; GCN: buffer_store_short v[[A_F16]]
-; GCN: s_endpgm
  
  ; GCN: two{{$}}
  ; VI:  v_mov_b32_e32 v[[B_F16:[0-9]+]], 0x3800{{$}}
diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll

index 92debd8b927de8ec35e43987399d71382809a0a2..39505404a8681b291cfa3f458ce39378cf74b358 100644 (file)
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -475,14 +475,13 @@ ret:
  
  ; GCN-LABEL: {{^}}long_branch_hang:
  ; GCN: s_cmp_lt_i32 s{{[0-9]+}}, 6
-; GCN-NEXT: s_cbranch_scc1 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
-; GCN-NEXT: s_branch  [[SHORTB:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: s_cbranch_scc0 [[LONG_BR_0:BB[0-9]+_[0-9]+]]
+; GCN-NEXT: BB{{[0-9]+_[0-9]+}}:
  
-; GCN-NEXT: [[LONG_BR_0]]:
  ; GCN: s_add_u32 vcc_lo, vcc_lo, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
  ; GCN: s_setpc_b64
  
-; GCN: [[SHORTB]]:
+; GCN-NEXT: [[LONG_BR_0]]:
  ; GCN-DAG: v_cmp_lt_i32
  ; GCN-DAG: v_cmp_gt_i32
  ; GCN: s_cbranch_vccnz
@@ -492,7 +491,6 @@ ret:
  
  ; GCN: [[LONG_BR_DEST0]]
  ; GCN: v_cmp_ne_u32_e32
-; GCN-NEXT: ; implicit-def
  ; GCN-NEXT: s_cbranch_vccz
  ; GCN: s_setpc_b64
  
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll

index 3a933306c64e3b9250e275dcbf307b32713c3f29..528e12b76ce046d1daba56ad86f22fdaeb0a2a9c 100644 (file)
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -506,11 +506,13 @@ bb:
  bb1:
    %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
    %tmp3 = extractelement <4 x float> %tmp2, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
    br label %bb7
  
  bb4:
    %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
    %tmp6 = extractelement <4 x float> %tmp5, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
    br label %bb7
  
  bb7:
@@ -554,11 +556,13 @@ bb:
  bb1:                                              ; preds = %bb
    %tmp2 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
    %tmp3 = insertelement <4 x float> %tmp2, float %val0, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp3) #0 ; Prevent block optimize out
    br label %bb7
  
  bb4:                                              ; preds = %bb
    %tmp5 = load volatile <4 x float>, <4 x float> addrspace(1)* undef
    %tmp6 = insertelement <4 x float> %tmp5, float %val0, i32 undef
+  call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp6) #0 ; Prevent block optimize out
    br label %bb7
  
  bb7:                                              ; preds = %bb4, %bb1
@@ -745,6 +749,8 @@ bb8:                                              ; preds = %bb2
  }
  
  declare i32 @llvm.amdgcn.workitem.id.x() #1
+declare void @llvm.amdgcn.s.barrier() #2
  
  attributes #0 = { nounwind }
  attributes #1 = { nounwind readnone }
+attributes #2 = { nounwind convergent }
diff --git a/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll b/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll

index 8dbec18dbf2b8f086d2c1d425766b1db25593fc8..078d6330ce04e25d9931d9c769c88070a054d43e 100644 (file)
--- a/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
+++ b/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
@@ -1,8 +1,5 @@
  ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
  
-; FIXME: Enabling critical edge splitting will fix this.
-; XFAIL: *
-
  ; Make sure that m0 is not reinitialized in the loop.
  
  ; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
@@ -12,7 +9,9 @@
  ; GCN: s_mov_b32 m0, -1
  
  ; GCN: BB0_2:
+; GCN-NOT: m0
  ; GCN: ds_read_b32
+; GCN-NOT: m0
  ; GCN: buffer_store_dword
  
  ; GCN: s_cbranch_scc0 BB0_2
diff --git a/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/test/CodeGen/AMDGPU/sgpr-control-flow.ll

index bb3f94914359597970bf09a5bcf191ba6701b0ec..d5d2f6b717f9600ae643469d2ba3d40c18fa037e 100644 (file)
--- a/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
  ;
  ;
  ; Most SALU instructions ignore control flow, so we need to make sure
@@ -9,13 +9,54 @@
  ; about instructions in different blocks overwriting each other.
  ; SI-LABEL: {{^}}sgpr_if_else_salu_br:
  ; SI: s_add
-; SI: s_add
+; SI: s_branch
+
+; SI: s_sub
  
  define void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
  entry:
    %0 = icmp eq i32 %a, 0
    br i1 %0, label %if, label %else
  
+if:
+  %1 = sub i32 %b, %c
+  br label %endif
+
+else:
+  %2 = add i32 %d, %e
+  br label %endif
+
+endif:
+  %3 = phi i32 [%1, %if], [%2, %else]
+  %4 = add i32 %3, %a
+  store i32 %4, i32 addrspace(1)* %out
+  ret void
+}
+
+; SI-LABEL: {{^}}sgpr_if_else_salu_br_opt:
+; SI: s_cmp_lg_u32
+; SI: s_cbranch_scc0 [[IF:BB[0-9]+_[0-9]+]]
+
+; SI: ; BB#1: ; %else
+; SI: s_load_dword [[LOAD0:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xe
+; SI: s_load_dword [[LOAD1:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xf
+; SI-NOT: add
+; SI: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
+
+; SI: [[IF]]: ; %if
+; SI: s_load_dword [[LOAD0]], s{{\[[0-9]+:[0-9]+\]}}, 0xc
+; SI: s_load_dword [[LOAD1]], s{{\[[0-9]+:[0-9]+\]}}, 0xd
+; SI-NOT: add
+
+; SI: [[ENDIF]]: ; %endif
+; SI: s_add_i32 s{{[0-9]+}}, [[LOAD0]], [[LOAD1]]
+; SI: buffer_store_dword
+; SI-NEXT: s_endpgm
+define void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+entry:
+  %0 = icmp eq i32 %a, 0
+  br i1 %0, label %if, label %else
+
  if:
    %1 = add i32 %b, %c
    br label %endif
@@ -67,7 +108,7 @@ endif:
  ; SI: v_cmp_gt_i32_e32 [[CMP_IF:vcc]], 0, [[AVAL]]
  ; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]]
  
-; SI: BB2_2:
+; SI: BB{{[0-9]+}}_2:
  ; SI: buffer_load_dword [[AVAL:v[0-9]+]]
  ; SI: v_cmp_eq_u32_e32 [[CMP_ELSE:vcc]], 0, [[AVAL]]
  ; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]]
diff --git a/test/CodeGen/AMDGPU/sgpr-copy.ll b/test/CodeGen/AMDGPU/sgpr-copy.ll

index da270c533ece97bec242a53943637e0ba66c572d..e65f1e2da5706e8b47cf14d06d16ea0d0f17991a 100644 (file)
--- a/test/CodeGen/AMDGPU/sgpr-copy.ll
+++ b/test/CodeGen/AMDGPU/sgpr-copy.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck %s
-; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck %s
  
  ; This test checks that no VGPR to SGPR copies are created by the register
  ; allocator.
@@ -223,8 +223,15 @@ declare i32 @llvm.SI.packf16(float, float) #1
  ; an assertion failure.
  
  ; CHECK-LABEL: {{^}}sample_v3:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 11
+; CHECK: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 13
+; CHECK: s_branch
+
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_LO:[0-9]+]], 5
+; CHECK-DAG: v_mov_b32_e32 v[[SAMPLE_HI:[0-9]+]], 7
+
+; CHECK: BB{{[0-9]+_[0-9]+}}:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[SAMPLE_LO]]:[[SAMPLE_HI]]{{\]}}
  ; CHECK: exp
  ; CHECK: s_endpgm
  define amdgpu_ps void @sample_v3([17 x <16 x i8>] addrspace(2)* byval %arg, [32 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <8 x i32>] addrspace(2)* byval %arg2, float inreg %arg3, i32 inreg %arg4, <2 x i32> %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <3 x i32> %arg8, <2 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, float %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, float %arg19, float %arg20) #0 {
@@ -241,14 +248,14 @@ entry:
    br i1 %tmp27, label %if, label %else
  
  if:                                               ; preds = %entry
-  %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> zeroinitializer, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %val.if = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 11, i32 13>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
    %val.if.0 = extractelement <4 x float> %val.if, i32 0
    %val.if.1 = extractelement <4 x float> %val.if, i32 1
    %val.if.2 = extractelement <4 x float> %val.if, i32 2
    br label %endif
  
  else:                                             ; preds = %entry
-  %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 1, i32 0>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+  %val.else = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> <i32 5, i32 7>, <8 x i32> %tmp24, <4 x i32> %tmp26.bc, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
    %val.else.0 = extractelement <4 x float> %val.else, i32 0
    %val.else.1 = extractelement <4 x float> %val.else, i32 1
    %val.else.2 = extractelement <4 x float> %val.else, i32 2
@@ -317,9 +324,15 @@ ENDIF69:                                          ; preds = %LOOP68
  
  ; This test checks that image_sample resource descriptors aren't loaded into
  ; vgprs.  The verifier will fail if this happens.
-; CHECK-LABEL:{{^}}sample_rsrc:
-; CHECK: image_sample
-; CHECK: image_sample
+; CHECK-LABEL:{{^}}sample_rsrc
+
+; CHECK: s_cmp_eq_u32
+; CHECK: s_cbranch_scc0 [[END:BB[0-9]+_[0-9]+]]
+
+; CHECK: v_add_i32_e32 v[[ADD:[0-9]+]], vcc, 1, v{{[0-9]+}}
+
+; [[END]]:
+; CHECK: image_sample v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+}}:[[ADD]]{{\]}}
  ; CHECK: s_endpgm
  define amdgpu_ps void @sample_rsrc([6 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [16 x <4 x i32>] addrspace(2)* byval %arg2, [32 x <8 x i32>] addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, float %arg18, i32 %arg19, float %arg20, float %arg21) #0 {
  bb:
diff --git a/test/CodeGen/AMDGPU/skip-if-dead.ll b/test/CodeGen/AMDGPU/skip-if-dead.ll

index d458faa818b8f6a538abe3a27305f80a6f63af26..33f5e98fcc7e8185271e99b59e6fbdef98fe9603 100644 (file)
--- a/test/CodeGen/AMDGPU/skip-if-dead.ll
+++ b/test/CodeGen/AMDGPU/skip-if-dead.ll
@@ -308,10 +308,8 @@ end:
  ; CHECK: s_mov_b64 exec, 0
  
  ; CHECK: [[SKIPKILL]]:
-; CHECK: v_cmp_nge_f32
-; CHECK-NEXT: s_cbranch_vccz [[UNREACHABLE:BB[0-9]+_[0-9]+]]
-
-; CHECK: [[UNREACHABLE]]:
+; CHECK: v_cmp_nge_f32_e32 vcc
+; CHECK-NEXT: BB#3: ; %bb5
  ; CHECK-NEXT: .Lfunc_end{{[0-9]+}}
  define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
  bb:
diff --git a/test/CodeGen/AMDGPU/uniform-cfg.ll b/test/CodeGen/AMDGPU/uniform-cfg.ll

index a5d1cd2281c54e3eaffae7d5ba22c6eb4e32fb2c..a0060bd368be0f6e09ea1f554514f8675e69de78 100644 (file)
--- a/test/CodeGen/AMDGPU/uniform-cfg.ll
+++ b/test/CodeGen/AMDGPU/uniform-cfg.ll
@@ -197,15 +197,15 @@ if.end:                                           ; preds = %if.else, %if.then
  ; GCN: s_cmp_lg_u32 s{{[0-9]+}}, 0
  ; GCN-NEXT: s_cbranch_scc0 [[IF_LABEL:[0-9_A-Za-z]+]]
  
-; GCN: v_mov_b32_e32 [[TWO:v[0-9]+]], 2
-; GCN: buffer_store_dword [[TWO]]
+; GCN: v_mov_b32_e32 [[IMM_REG:v[0-9]+]], 2
  ; GCN: s_branch [[ENDIF_LABEL:[0-9_A-Za-z]+]]
  
  ; GCN: [[IF_LABEL]]:
-; GCN: v_mov_b32_e32 [[ONE:v[0-9]+]], 1
-; GCN: buffer_store_dword [[ONE]]
+; GCN-NEXT: v_mov_b32_e32 [[IMM_REG]], 1
+
+; GCN-NEXT: [[ENDIF_LABEL]]:
+; GCN: buffer_store_dword [[IMM_REG]]
  
-; GCN: [[ENDIF_LABEL]]:
  ; GCN: v_mov_b32_e32 [[THREE:v[0-9]+]], 3
  ; GCN: buffer_store_dword [[THREE]]
  ; GCN: s_endpgm
author	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 6 Dec 2016 01:02:51 +0000 (01:02 +0000)
committer	Matt Arsenault <Matthew.Arsenault@amd.com>
	Tue, 6 Dec 2016 01:02:51 +0000 (01:02 +0000)
lib/Target/AMDGPU/AMDGPUTargetMachine.cpp		patch \| blob \| history
test/CodeGen/AMDGPU/basic-branch.ll		patch \| blob \| history
test/CodeGen/AMDGPU/br_cc.f16.ll		patch \| blob \| history
test/CodeGen/AMDGPU/branch-relaxation.ll		patch \| blob \| history
test/CodeGen/AMDGPU/indirect-addressing-si.ll		patch \| blob \| history
test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll		patch \| blob \| history
test/CodeGen/AMDGPU/sgpr-control-flow.ll		patch \| blob \| history
test/CodeGen/AMDGPU/sgpr-copy.ll		patch \| blob \| history
test/CodeGen/AMDGPU/skip-if-dead.ll		patch \| blob \| history
test/CodeGen/AMDGPU/uniform-cfg.ll		patch \| blob \| history