// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog pass
if (!IsSibCall) {
- Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
unsigned OffsetReg = Info->getScratchWaveOffsetReg();
InFlag = Chain.getValue(1);
}
- uint64_t CalleePopBytes = 0;
- Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(NumBytes, DL, MVT::i32),
+ uint64_t CalleePopBytes = NumBytes;
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getTargetConstant(0, DL, MVT::i32),
DAG.getTargetConstant(CalleePopBytes, DL, MVT::i32),
InFlag, DL);
if (!Ins.empty())
; GCN-DAG: s_add_u32 s32, s32, 0xc00{{$}}
; GCN-DAG: v_writelane_b32
-; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13
; GCN: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
; GCN: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
+; GCN-NOT: s_add_u32 s32, s32, 0x800
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
; GCN: v_readlane_b32
; GCN-NOT: v_readlane_b32 s32
-; GCN: s_sub_u32 s32, s32, 0x800{{$}}
-; GCN-NEXT: s_sub_u32 s32, s32, 0xc00{{$}}
+; GCN-NOT: s_sub_u32 s32, s32, 0x800
+
+; GCN: s_sub_u32 s32, s32, 0xc00{{$}}
; GCN-NEXT: s_waitcnt
; GCN-NEXT: s_setpc_b64
define void @call_void_func_byval_struct_func() #0 {
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s33 offset:8
; GCN: buffer_store_dword [[THIRTEEN]], off, s[0:3], s33 offset:24
-; GCN-DAG: s_add_u32 s32, s32, 0x800{{$}}
+; GCN-NOT: s_add_u32 s32, s32, 0x800
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s33 offset:8
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s33 offset:12
; GCN: s_swappc_b64
-; FIXME: Dead SP modfication
-; GCN-NEXT: s_sub_u32 s32, s32, 0x800{{$}}
-; GCN-NEXT: s_endpgm
+; GCN-NOT: s_sub_u32 s32
+; GCN: s_endpgm
define amdgpu_kernel void @call_void_func_byval_struct_kernel() #0 {
entry:
%arg0 = alloca %struct.ByValStruct, align 4
; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32:
; HSA-DAG: s_mov_b32 s33, s9
-; HSA-DAG: s_add_u32 [[SP_REG:s[0-9]+]], s33, 0x100{{$}}
+; HSA-NOT: s_add_u32 s32
; MESA-DAG: s_mov_b32 s33, s3{{$}}
-; MESA-DAG: s_add_u32 [[SP_REG:s[0-9]+]], s33, 0x100{{$}}
+; MESA-NOT: s_add_u32 s32
; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
; GCN-DAG: buffer_load_dwordx4 v[0:3], off
; GCN-DAG: buffer_load_dwordx4 v[24:27], off
; GCN-DAG: buffer_load_dwordx4 v[28:31], off
-; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], [[SP_REG]] offset:4{{$}}
+; GCN: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32 offset:4{{$}}
; GCN: s_waitcnt
; GCN-NEXT: s_swappc_b64
; GCN-NEXT: s_endpgm
; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], s33 offset:8
; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], s33 offset:12
-; GCN: s_add_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: s_add_u32 [[SP]],
; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], s33 offset:8
; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], s33 offset:12
; MESA: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:8
; GCN-NEXT: s_swappc_b64
-; GCN-NEXT: s_sub_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: [[SP]]
define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 {
%val = alloca { i8, i32 }, align 4
%gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 }* %val, i32 0, i32 0
; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:8
; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:12
-; GCN-DAG: s_add_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: s_add_u32 [[SP]]
; GCN: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4
; GCN: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:8
; GCN-NEXT: s_swappc_b64
; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:16
; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, [[FP_REG]] offset:20
-; GCN: s_sub_u32 [[SP]], [[SP]], 0x200
+; GCN-NOT: s_sub_u32 [[SP]]
; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off
; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off
; GCN: s_mov_b32 s33, s7
; GCN: s_add_u32 s32, s33, 0x200{{$}}
-; GCN-DAG: s_add_u32 s32, s32, 0x100{{$}}
+; GCN-NOT: s32
; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12