let OutOperandList = (outs);
let InOperandList = (ins variable_ops);
let AsmString = "BUNDLE";
- let hasSideEffects = 1;
+ let hasSideEffects = 0;
}
def LIFETIME_START : StandardPseudoInstruction {
let OutOperandList = (outs);
; GCN-LABEL: {{^}}call_void_func_byval_struct_kernel:
; GCN: s_mov_b32 s33, s7
-; GCN: s_add_u32 s32, s33, 0xc00{{$}}
+; GCN-NOT: s_add_u32 s32, s32, 0x800
-; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
-; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
-; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
+; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
+; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
; GCN-NOT: s_add_u32 s32, s32, 0x800
-
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
+; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
+; GCN: s_getpc_b64
+
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; Make sure the byval alignment is respected in the call frame setup
; GCN-LABEL: {{^}}call_void_func_byval_struct_align8_kernel:
; GCN: s_mov_b32 s33, s7
-; GCN: s_add_u32 s32, s33, 0xc00{{$}}
+; GCN-NOT: s_add_u32 s32, s32, 0x800
-; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
-; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
-; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
+; GCN: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
+; GCN: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
+; GCN: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
+
; GCN-NOT: s_add_u32 s32, s32, 0x800
-; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
-; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
-; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
-; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
+; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
+; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
+; GCN-DAG: s_add_u32 s32, s33, 0xc00{{$}}
+; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
+; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20
+
+; GCN: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
+; GCN: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
+; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
+; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
-; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
-; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
-; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
-; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12
; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
; HSA: buffer_load_ubyte [[VAR:v[0-9]+]]
; HSA: s_mov_b32 s32, s33
+; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
+; MESA-DAG: s_mov_b32 s32, s33{{$}}
+
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+4
-; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]]
-; MESA-DAG: s_mov_b32 s32, s33{{$}}
-
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1
; GCN-NEXT: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; HSA: buffer_load_ubyte v0
; HSA-DAG: s_mov_b32 s32, s33{{$}}
+; MESA: buffer_load_ubyte v0
+; MESA-DAG: s_mov_b32 s32, s33{{$}}
+
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+4
-; MESA: buffer_load_ubyte v0
-; MESA-DAG: s_mov_b32 s32, s33{{$}}
; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: v_and_b32_e32 v0, 1, v0
; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:16
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:20
+
+; GCN: s_getpc_b64
+
; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4
; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
-; GCN: s_getpc_b64
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GCN-NOT: s32
; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}}
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:4
+; GCN: s_getpc_b64
; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}}
; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:4
-; GCN: s_getpc_b64
; GCN: buffer_load_dword v33, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; GCN: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; GCN-NOT: s32
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: s_mov_b32 s33, s9
; GCN: s_mov_b32 s32, s33
-; GCN: #ASMSTART
-; GCN-NEXT: ; def s33
-; GCN-NEXT: #ASMEND
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4
+; GCN: #ASMSTART
+; GCN-NEXT: ; def s33
+; GCN-NEXT: #ASMEND
; GCN: s_swappc_b64 s[30:31], s[4:5]
; GCN: ;;#ASMSTART
; GCN-NEXT: ; use s33
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34:
; GCN: s_mov_b32 s33, s9
; GCN-NOT: s34
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; def s34
-; GCN-NEXT: ;;#ASMEND
-
; GCN-NOT: s34
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, external_void_func_void@rel32@hi+4
+; GCN-NOT: s34
+; GCN: ;;#ASMSTART
+; GCN-NEXT: ; def s34
+; GCN-NEXT: ;;#ASMEND
+
; GCN-NOT: s34
; GCN: s_swappc_b64 s[30:31], s[4:5]
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32:
; GCN: s_mov_b32 s33, s9
-; GCN: ;;#ASMSTART
-; GCN-NEXT: ; def v32
-; GCN-NEXT: ;;#ASMEND
-
; GCN-NOT: v32
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
; GCN-NOT: v32
; GCN-DAG: s_mov_b32 s32, s33
+; GCN: ;;#ASMSTART
+; GCN-NEXT: ; def v32
+; GCN-NEXT: ;;#ASMEND
+
; GCN: s_swappc_b64 s[30:31], s[4:5]
; GCN-NOT: v32
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: s_mov_b32 s33, s9
; GCN-NEXT: s_add_u32 flat_scratch_lo, s6, s33
-; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GCN-NEXT: v_mov_b32_e32 v2, 0
+; GCN-NEXT: s_addc_u32 flat_scratch_hi, s7, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
+; GCN-NEXT: global_store_dword v[0:1], v2, off
+; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+4
-; GCN-NEXT: global_store_dword v[0:1], v2, off
-; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_mov_b32 s32, s33
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: s_endpgm
; GCN-LABEL: tail_call_memory_arg_load:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: ds_read_b32 v0, v0
; GCN-NEXT: s_getpc_b64 s[6:7]
; GCN-NEXT: s_add_u32 s6, s6, func@rel32@lo+4
; GCN-NEXT: s_addc_u32 s7, s7, func@rel32@hi+4
-; GCN-NEXT: ds_read_b32 v0, v0
; GCN-NEXT: s_setpc_b64 s[6:7]
%vgpr = load volatile i32, i32 addrspace(3)* %ptr
tail call void @func(i32 %vgpr)
; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
; GCN-NEXT: buffer_store_dword v34, off, s[0:3], s5 offset:8
; GCN-NEXT: s_mov_b64 exec
+; GCN-DAG: s_getpc_b64
; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
; GCN: buffer_store_dword v33, off, s[0:3], s5 ; 4-byte Folded Spill
; GCN-DAG: v_writelane_b32 v34, s34, 0
; GCN-DAG: v_writelane_b32 v34, s35, 1
-; GCN-DAG: s_getpc_b64
; GCN: s_swappc_b64
-; GCN: s_getpc_b64 s[6:7]
-; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4
-; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4
-
; GCN-DAG: v_readlane_b32 s34, v34, 0
; GCN-DAG: v_readlane_b32 s35, v34, 1
; GCN: buffer_load_dword v33, off, s[0:3], s5 ; 4-byte Folded Reload
; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
+; GCN: s_getpc_b64 s[6:7]
+; GCN: s_add_u32 s6, s6, sibling_call_i32_fastcc_i32_i32@rel32@lo+4
+; GCN: s_addc_u32 s7, s7, sibling_call_i32_fastcc_i32_i32@rel32@hi+4
; GCN: s_or_saveexec_b64 s{{\[[0-9]+:[0-9]+\]}}, -1
; GCN-NEXT: buffer_load_dword v34, off, s[0:3], s5 offset:8
; GCN-NEXT: s_mov_b64 exec
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]
; CHECK: mrc p15, #0, [[TEB:r[0-9]]], c13, c0, #2
-; CHECK: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK: movw [[TLS_INDEX:r[0-9]]], :lower16:_tls_index
; CHECK-NEXT: movt [[TLS_INDEX]], :upper16:_tls_index
; CHECK-NEXT: ldr [[INDEX:r[0-9]]], {{\[}}[[TLS_INDEX]]]
-
+; CHECK-NEXT: ldr [[TLS_POINTER:r[0-9]]], {{\[}}[[TEB]], #44]
; CHECK-NEXT: ldr{{.w}} [[TLS:r[0-9]]], {{\[}}[[TLS_POINTER]], [[INDEX]], lsl #2]
; CHECK-NEXT: ldr [[SLOT:r[0-9]]], [[CPI:\.LCPI[0-9]+_[0-9]+]]