From: Hans Wennborg Date: Wed, 14 Aug 2019 12:59:17 +0000 (+0000) Subject: Merging r368300: X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d48ca225eda4c1faef3e1738c289af633f65ea87;p=llvm Merging r368300: ------------------------------------------------------------------------ r368300 | lenary | 2019-08-08 16:40:54 +0200 (Thu, 08 Aug 2019) | 18 lines [RISCV] Minimal stack realignment support Summary: Currently the RISC-V backend does not realign the stack. This can be an issue even for the RV32I/RV64I ABIs (where the stack is 16-byte aligned), though is rare. It will be much more comment with RV32E (though the alignment requirements for common data types remain under-documented...). This patch adds minimal support for stack realignment. It should cope with large realignments. It will error out if the stack needs realignment and variable sized objects are present. It feels like a lot of the code like getFrameIndexReference and determineFrameLayout could be refactored somehow, as right now it feels fiddly and brittle. We also seem to allocate a lot more memory than GCC does for equivalent C code. Reviewers: asb Reviewed By: asb Subscribers: wwei, jrtc27, s.egerton, MaskRay, Jim, lenary, hiraditya, rbar, johnrusso, simoncook, apazos, sabuasal, niosHD, kito-cheng, shiva0217, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, rkruppe, PkmX, jocewei, psnobl, benna, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62007 ------------------------------------------------------------------------ git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_90@368846 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/RISCV/RISCVFrameLowering.cpp b/lib/Target/RISCV/RISCVFrameLowering.cpp index 32c3b9684d2..bbaa16c0863 100644 --- a/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -40,8 +40,16 @@ void RISCVFrameLowering::determineFrameLayout(MachineFunction &MF) const { uint64_t FrameSize = MFI.getStackSize(); // Get the alignment. - uint64_t StackAlign = RI->needsStackRealignment(MF) ? MFI.getMaxAlignment() - : getStackAlignment(); + unsigned StackAlign = getStackAlignment(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxStackAlign = std::max(StackAlign, MFI.getMaxAlignment()); + FrameSize += (MaxStackAlign - StackAlign); + StackAlign = MaxStackAlign; + } + + // Set Max Call Frame Size + uint64_t MaxCallSize = alignTo(MFI.getMaxCallFrameSize(), StackAlign); + MFI.setMaxCallFrameSize(MaxCallSize); // Make sure the frame is aligned. FrameSize = alignTo(FrameSize, StackAlign); @@ -101,6 +109,12 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, const RISCVInstrInfo *TII = STI.getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.begin(); + if (RI->needsStackRealignment(MF) && MFI.hasVarSizedObjects()) { + report_fatal_error( + "RISC-V backend can't currently handle functions that need stack " + "realignment and have variable sized objects"); + } + unsigned FPReg = getFPReg(STI); unsigned SPReg = getSPReg(STI); @@ -158,6 +172,29 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF, nullptr, RI->getDwarfRegNum(FPReg, true), 0)); BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); + + // Realign Stack + const RISCVRegisterInfo *RI = STI.getRegisterInfo(); + if (RI->needsStackRealignment(MF)) { + unsigned MaxAlignment = MFI.getMaxAlignment(); + + const RISCVInstrInfo *TII = STI.getInstrInfo(); + if (isInt<12>(-(int)MaxAlignment)) { + BuildMI(MBB, MBBI, DL, TII->get(RISCV::ANDI), SPReg) + .addReg(SPReg) + .addImm(-(int)MaxAlignment); + } else { + unsigned ShiftAmount = countTrailingZeros(MaxAlignment); + unsigned VR = + MF.getRegInfo().createVirtualRegister(&RISCV::GPRRegClass); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SRLI), VR) + .addReg(SPReg) + .addImm(ShiftAmount); + BuildMI(MBB, MBBI, DL, TII->get(RISCV::SLLI), SPReg) + .addReg(VR) + .addImm(ShiftAmount); + } + } } } @@ -257,6 +294,13 @@ int RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = RISCV::X2; Offset += MF.getFrameInfo().getStackSize(); + } else if (RI->needsStackRealignment(MF)) { + assert(!MFI.hasVarSizedObjects() && + "Unexpected combination of stack realignment and varsized objects"); + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, but we still access stack objects using SP. + FrameReg = RISCV::X2; + Offset += MF.getFrameInfo().getStackSize(); } else { FrameReg = RI->getFrameRegister(MF); if (hasFP(MF)) diff --git a/test/CodeGen/RISCV/stack-realignment-unsupported.ll b/test/CodeGen/RISCV/stack-realignment-unsupported.ll new file mode 100644 index 00000000000..f2f11b073cf --- /dev/null +++ b/test/CodeGen/RISCV/stack-realignment-unsupported.ll @@ -0,0 +1,13 @@ +; RUN: not llc -mtriple=riscv32 < %s 2>&1 | FileCheck %s +; RUN: not llc -mtriple=riscv64 < %s 2>&1 | FileCheck %s + +; CHECK: LLVM ERROR: RISC-V backend can't currently handle functions that need stack realignment and have variable sized objects + +declare void @callee(i8*, i32*) + +define void @caller(i32 %n) nounwind { + %1 = alloca i8, i32 %n + %2 = alloca i32, align 64 + call void @callee(i8* %1, i32 *%2) + ret void +} diff --git a/test/CodeGen/RISCV/stack-realignment.ll b/test/CodeGen/RISCV/stack-realignment.ll new file mode 100644 index 00000000000..252a099d098 --- /dev/null +++ b/test/CodeGen/RISCV/stack-realignment.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +declare void @callee(i8*) + +define void @caller32() nounwind { +; RV32I-LABEL: caller32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -64 +; RV32I-NEXT: sw ra, 60(sp) +; RV32I-NEXT: sw s0, 56(sp) +; RV32I-NEXT: addi s0, sp, 64 +; RV32I-NEXT: andi sp, sp, -32 +; RV32I-NEXT: addi a0, sp, 32 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -64 +; RV32I-NEXT: lw s0, 56(sp) +; RV32I-NEXT: lw ra, 60(sp) +; RV32I-NEXT: addi sp, sp, 64 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -64 +; RV64I-NEXT: sd ra, 56(sp) +; RV64I-NEXT: sd s0, 48(sp) +; RV64I-NEXT: addi s0, sp, 64 +; RV64I-NEXT: andi sp, sp, -32 +; RV64I-NEXT: addi a0, sp, 32 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -64 +; RV64I-NEXT: ld s0, 48(sp) +; RV64I-NEXT: ld ra, 56(sp) +; RV64I-NEXT: addi sp, sp, 64 +; RV64I-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign32() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign32: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign32: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(i8* %1) + ret void +} + +define void @caller64() nounwind { +; RV32I-LABEL: caller64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -128 +; RV32I-NEXT: sw ra, 124(sp) +; RV32I-NEXT: sw s0, 120(sp) +; RV32I-NEXT: addi s0, sp, 128 +; RV32I-NEXT: andi sp, sp, -64 +; RV32I-NEXT: addi a0, sp, 64 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -128 +; RV32I-NEXT: lw s0, 120(sp) +; RV32I-NEXT: lw ra, 124(sp) +; RV32I-NEXT: addi sp, sp, 128 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -128 +; RV64I-NEXT: sd ra, 120(sp) +; RV64I-NEXT: sd s0, 112(sp) +; RV64I-NEXT: addi s0, sp, 128 +; RV64I-NEXT: andi sp, sp, -64 +; RV64I-NEXT: addi a0, sp, 64 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -128 +; RV64I-NEXT: ld s0, 112(sp) +; RV64I-NEXT: ld ra, 120(sp) +; RV64I-NEXT: addi sp, sp, 128 +; RV64I-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign64() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign64: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign64: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(i8* %1) + ret void +} + +define void @caller128() nounwind { +; RV32I-LABEL: caller128: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -256 +; RV32I-NEXT: sw ra, 252(sp) +; RV32I-NEXT: sw s0, 248(sp) +; RV32I-NEXT: addi s0, sp, 256 +; RV32I-NEXT: andi sp, sp, -128 +; RV32I-NEXT: addi a0, sp, 128 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -256 +; RV32I-NEXT: lw s0, 248(sp) +; RV32I-NEXT: lw ra, 252(sp) +; RV32I-NEXT: addi sp, sp, 256 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller128: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -256 +; RV64I-NEXT: sd ra, 248(sp) +; RV64I-NEXT: sd s0, 240(sp) +; RV64I-NEXT: addi s0, sp, 256 +; RV64I-NEXT: andi sp, sp, -128 +; RV64I-NEXT: addi a0, sp, 128 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -256 +; RV64I-NEXT: ld s0, 240(sp) +; RV64I-NEXT: ld ra, 248(sp) +; RV64I-NEXT: addi sp, sp, 256 +; RV64I-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign128() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign128: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign128: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(i8* %1) + ret void +} + +define void @caller256() nounwind { +; RV32I-LABEL: caller256: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -512 +; RV32I-NEXT: sw ra, 508(sp) +; RV32I-NEXT: sw s0, 504(sp) +; RV32I-NEXT: addi s0, sp, 512 +; RV32I-NEXT: andi sp, sp, -256 +; RV32I-NEXT: addi a0, sp, 256 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -512 +; RV32I-NEXT: lw s0, 504(sp) +; RV32I-NEXT: lw ra, 508(sp) +; RV32I-NEXT: addi sp, sp, 512 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller256: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -512 +; RV64I-NEXT: sd ra, 504(sp) +; RV64I-NEXT: sd s0, 496(sp) +; RV64I-NEXT: addi s0, sp, 512 +; RV64I-NEXT: andi sp, sp, -256 +; RV64I-NEXT: addi a0, sp, 256 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -512 +; RV64I-NEXT: ld s0, 496(sp) +; RV64I-NEXT: ld ra, 504(sp) +; RV64I-NEXT: addi sp, sp, 512 +; RV64I-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign256() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign256: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign256: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(i8* %1) + ret void +} + +define void @caller512() nounwind { +; RV32I-LABEL: caller512: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -1536 +; RV32I-NEXT: sw ra, 1532(sp) +; RV32I-NEXT: sw s0, 1528(sp) +; RV32I-NEXT: addi s0, sp, 1536 +; RV32I-NEXT: andi sp, sp, -512 +; RV32I-NEXT: addi a0, sp, 1024 +; RV32I-NEXT: call callee +; RV32I-NEXT: addi sp, s0, -1536 +; RV32I-NEXT: lw s0, 1528(sp) +; RV32I-NEXT: lw ra, 1532(sp) +; RV32I-NEXT: addi sp, sp, 1536 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller512: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -1536 +; RV64I-NEXT: sd ra, 1528(sp) +; RV64I-NEXT: sd s0, 1520(sp) +; RV64I-NEXT: addi s0, sp, 1536 +; RV64I-NEXT: andi sp, sp, -512 +; RV64I-NEXT: addi a0, sp, 1024 +; RV64I-NEXT: call callee +; RV64I-NEXT: addi sp, s0, -1536 +; RV64I-NEXT: ld s0, 1520(sp) +; RV64I-NEXT: ld ra, 1528(sp) +; RV64I-NEXT: addi sp, sp, 1536 +; RV64I-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign512() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign512: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign512: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(i8* %1) + ret void +} + +define void @caller1024() nounwind { +; RV32I-LABEL: caller1024: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1028 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw ra, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1032 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: andi sp, sp, -1024 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: sub sp, s0, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1032 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1028 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, -1024 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller1024: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: andi sp, sp, -1024 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: sub sp, s0, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld s0, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, -1024 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign1024() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign1024: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign1024: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(i8* %1) + ret void +} + +define void @caller2048() nounwind { +; RV32I-LABEL: caller2048: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2044 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw ra, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2040 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: andi sp, sp, -2048 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: sub sp, s0, a0 +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2040 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: addi a0, a0, 2044 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: addi a0, a0, -2048 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller2048: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: andi sp, sp, -2048 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: sub sp, s0, a0 +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2032 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld s0, 0(a0) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: addi a0, a0, 2040 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld ra, 0(a0) +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: addi a0, a0, -2048 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign2048() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign2048: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign2048: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(i8* %1) + ret void +} + +define void @caller4096() nounwind { +; RV32I-LABEL: caller4096: +; RV32I: # %bb.0: +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -4 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw ra, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -8 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: sw s0, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add s0, sp, a0 +; RV32I-NEXT: srli a0, sp, 12 +; RV32I-NEXT: slli sp, a0, 12 +; RV32I-NEXT: lui a0, 2 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: call callee +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: sub sp, s0, a0 +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -8 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw s0, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: addi a0, a0, -4 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: lw ra, 0(a0) +; RV32I-NEXT: lui a0, 3 +; RV32I-NEXT: mv a0, a0 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller4096: +; RV64I: # %bb.0: +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: sub sp, sp, a0 +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -8 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd ra, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -16 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: sd s0, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add s0, sp, a0 +; RV64I-NEXT: srli a0, sp, 12 +; RV64I-NEXT: slli sp, a0, 12 +; RV64I-NEXT: lui a0, 2 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: call callee +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: sub sp, s0, a0 +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -16 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld s0, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: addi a0, a0, -8 +; RV64I-NEXT: add a0, sp, a0 +; RV64I-NEXT: ld ra, 0(a0) +; RV64I-NEXT: lui a0, 3 +; RV64I-NEXT: mv a0, a0 +; RV64I-NEXT: add sp, sp, a0 +; RV64I-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign4096() nounwind "no-realign-stack" { +; RV32I-LABEL: caller_no_realign4096: +; RV32I: # %bb.0: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw ra, 12(sp) +; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: call callee +; RV32I-NEXT: lw ra, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: caller_no_realign4096: +; RV64I: # %bb.0: +; RV64I-NEXT: addi sp, sp, -16 +; RV64I-NEXT: sd ra, 8(sp) +; RV64I-NEXT: mv a0, sp +; RV64I-NEXT: call callee +; RV64I-NEXT: ld ra, 8(sp) +; RV64I-NEXT: addi sp, sp, 16 +; RV64I-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(i8* %1) + ret void +}