if (StackSize == 0 && !MFI.adjustsStack())
return;
+ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+ // Split the SP adjustment to reduce the offsets of callee saved spill.
+ if (FirstSPAdjustAmount)
+ StackSize = FirstSPAdjustAmount;
+
// Allocate space on the stack if necessary.
adjustReg(MBB, MBBI, DL, SPReg, SPReg, -StackSize, MachineInstr::FrameSetup);
nullptr, RI->getDwarfRegNum(FPReg, true), 0));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex);
+ }
+ // Emit the second SP adjustment after saving callee saved registers.
+ if (FirstSPAdjustAmount) {
+ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
+ assert(SecondSPAdjustAmount > 0 &&
+ "SecondSPAdjustAmount should be greater than zero");
+ adjustReg(MBB, MBBI, DL, SPReg, SPReg, -SecondSPAdjustAmount,
+ MachineInstr::FrameSetup);
+ // Emit ".cfi_def_cfa_offset StackSize"
+ unsigned CFIIndex = MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
+ BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
+ if (hasFP(MF)) {
// Realign Stack
const RISCVRegisterInfo *RI = STI.getRegisterInfo();
if (RI->needsStackRealignment(MF)) {
MachineInstr::FrameDestroy);
}
+ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+ if (FirstSPAdjustAmount) {
+ uint64_t SecondSPAdjustAmount = MFI.getStackSize() - FirstSPAdjustAmount;
+ assert(SecondSPAdjustAmount > 0 &&
+ "SecondSPAdjustAmount should be greater than zero");
+
+ adjustReg(MBB, LastFrameDestroy, DL, SPReg, SPReg, SecondSPAdjustAmount,
+ MachineInstr::FrameDestroy);
+
+ // Emit ".cfi_def_cfa_offset FirstSPAdjustAmount"
+ unsigned CFIIndex =
+ MF.addFrameInst(
+ MCCFIInstruction::createDefCfaOffset(nullptr,
+ -FirstSPAdjustAmount));
+ BuildMI(MBB, LastFrameDestroy, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+ .addCFIIndex(CFIIndex);
+ }
+
if (hasFP(MF)) {
// To find the instruction restoring FP from stack.
for (auto &I = LastFrameDestroy; I != MBBI; ++I) {
.addCFIIndex(CFIIndex);
}
+ if (FirstSPAdjustAmount)
+ StackSize = FirstSPAdjustAmount;
+
// Deallocate stack
adjustReg(MBB, MBBI, DL, SPReg, SPReg, StackSize, MachineInstr::FrameDestroy);
int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea() +
MFI.getOffsetAdjustment();
+ uint64_t FirstSPAdjustAmount = getFirstSPAdjustAmount(MF);
+
if (CSI.size()) {
MinCSFI = CSI[0].getFrameIdx();
MaxCSFI = CSI[CSI.size() - 1].getFrameIdx();
if (FI >= MinCSFI && FI <= MaxCSFI) {
FrameReg = RISCV::X2;
- Offset += MF.getFrameInfo().getStackSize();
+
+ if (FirstSPAdjustAmount)
+ Offset += FirstSPAdjustAmount;
+ else
+ Offset += MF.getFrameInfo().getStackSize();
} else if (RI->needsStackRealignment(MF)) {
assert(!MFI.hasVarSizedObjects() &&
"Unexpected combination of stack realignment and varsized objects");
return MBB.erase(MI);
}
+
+// We would like to split the SP adjustment to reduce prologue/epilogue
+// as following instructions. In this way, the offset of the callee saved
+// register could fit in a single store.
+// add sp,sp,-2032
+// sw ra,2028(sp)
+// sw s0,2024(sp)
+// sw s1,2020(sp)
+// sw s3,2012(sp)
+// sw s4,2008(sp)
+// add sp,sp,-64
+uint64_t
+RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
+ uint64_t StackSize = MFI.getStackSize();
+ uint64_t StackAlign = getStackAlignment();
+
+ // FIXME: Disable SplitSPAdjust if save-restore libcall enabled when the patch
+ // landing. The callee saved registers will be pushed by the
+ // save-restore libcalls, so we don't have to split the SP adjustment
+ // in this case.
+ //
+ // Return the FirstSPAdjustAmount if the StackSize can not fit in signed
+ // 12-bit and there exists a callee saved register need to be pushed.
+ if (!isInt<12>(StackSize) && (CSI.size() > 0)) {
+ // FirstSPAdjustAmount is choosed as (2048 - StackAlign)
+ // because 2048 will cause sp = sp + 2048 in epilogue split into
+ // multi-instructions. The offset smaller than 2048 can fit in signle
+ // load/store instruction and we have to stick with the stack alignment.
+ // 2048 is 16-byte alignment. The stack alignment for RV32 and RV64 is 16,
+ // for RV32E is 4. So (2048 - StackAlign) will satisfy the stack alignment.
+ return 2048 - StackAlign;
+ }
+ return 0;
+}
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
+ // Get the first stack adjustment amount for SplitSPAdjust.
+ // Return 0 if we don't want to to split the SP adjustment in prologue and
+ // epilogue.
+ uint64_t getFirstSPAdjustAmount(const MachineFunction &MF) const;
+
protected:
const RISCVSubtarget &STI;
; TODO: the quality of the generated code is poor
-define void @test() nounwind {
+define void @test() {
; RV32I-FPELIM-LABEL: test:
; RV32I-FPELIM: # %bb.0:
; RV32I-FPELIM-NEXT: lui a0, 74565
; RV32I-FPELIM-NEXT: addi a0, a0, 1664
; RV32I-FPELIM-NEXT: sub sp, sp, a0
+; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 305419904
; RV32I-FPELIM-NEXT: lui a0, 74565
; RV32I-FPELIM-NEXT: addi a0, a0, 1664
; RV32I-FPELIM-NEXT: add sp, sp, a0
+; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 0
; RV32I-FPELIM-NEXT: ret
;
; RV32I-WITHFP-LABEL: test:
; RV32I-WITHFP: # %bb.0:
+; RV32I-WITHFP-NEXT: addi sp, sp, -2032
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-WITHFP-NEXT: sw ra, 2028(sp)
+; RV32I-WITHFP-NEXT: sw s0, 2024(sp)
+; RV32I-WITHFP-NEXT: .cfi_offset ra, -4
+; RV32I-WITHFP-NEXT: .cfi_offset s0, -8
+; RV32I-WITHFP-NEXT: addi s0, sp, 2032
+; RV32I-WITHFP-NEXT: .cfi_def_cfa s0, 0
; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1680
+; RV32I-WITHFP-NEXT: addi a0, a0, -352
; RV32I-WITHFP-NEXT: sub sp, sp, a0
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 305419920
; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1676
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: sw ra, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1672
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: sw s0, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1680
-; RV32I-WITHFP-NEXT: add s0, sp, a0
-; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1672
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: lw s0, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1676
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: lw ra, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 74565
-; RV32I-WITHFP-NEXT: addi a0, a0, 1680
+; RV32I-WITHFP-NEXT: addi a0, a0, -352
; RV32I-WITHFP-NEXT: add sp, sp, a0
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-WITHFP-NEXT: lw s0, 2024(sp)
+; RV32I-WITHFP-NEXT: .cfi_def_cfa sp, 305419920
+; RV32I-WITHFP-NEXT: lw ra, 2028(sp)
+; RV32I-WITHFP-NEXT: .cfi_restore ra
+; RV32I-WITHFP-NEXT: .cfi_restore s0
+; RV32I-WITHFP-NEXT: addi sp, sp, 2032
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 0
; RV32I-WITHFP-NEXT: ret
%tmp = alloca [ 305419896 x i8 ] , align 4
ret void
; This test case artificially produces register pressure which should force
; use of the emergency spill slot.
-define void @test_emergency_spill_slot(i32 %a) nounwind {
+define void @test_emergency_spill_slot(i32 %a) {
; RV32I-FPELIM-LABEL: test_emergency_spill_slot:
; RV32I-FPELIM: # %bb.0:
-; RV32I-FPELIM-NEXT: lui a1, 98
-; RV32I-FPELIM-NEXT: addi a1, a1, -1392
+; RV32I-FPELIM-NEXT: addi sp, sp, -2032
+; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-FPELIM-NEXT: sw s0, 2028(sp)
+; RV32I-FPELIM-NEXT: sw s1, 2024(sp)
+; RV32I-FPELIM-NEXT: .cfi_offset s0, -4
+; RV32I-FPELIM-NEXT: .cfi_offset s1, -8
+; RV32I-FPELIM-NEXT: lui a1, 97
+; RV32I-FPELIM-NEXT: addi a1, a1, 672
; RV32I-FPELIM-NEXT: sub sp, sp, a1
-; RV32I-FPELIM-NEXT: lui a1, 98
-; RV32I-FPELIM-NEXT: addi a1, a1, -1396
-; RV32I-FPELIM-NEXT: add a1, sp, a1
-; RV32I-FPELIM-NEXT: sw s0, 0(a1)
-; RV32I-FPELIM-NEXT: lui a1, 98
-; RV32I-FPELIM-NEXT: addi a1, a1, -1400
-; RV32I-FPELIM-NEXT: add a1, sp, a1
-; RV32I-FPELIM-NEXT: sw s1, 0(a1)
+; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 400016
; RV32I-FPELIM-NEXT: lui a1, 78
; RV32I-FPELIM-NEXT: addi a1, a1, 512
; RV32I-FPELIM-NEXT: addi a2, sp, 8
; RV32I-FPELIM-NEXT: #APP
; RV32I-FPELIM-NEXT: nop
; RV32I-FPELIM-NEXT: #NO_APP
-; RV32I-FPELIM-NEXT: lui a0, 98
-; RV32I-FPELIM-NEXT: addi a0, a0, -1400
-; RV32I-FPELIM-NEXT: add a0, sp, a0
-; RV32I-FPELIM-NEXT: lw s1, 0(a0)
-; RV32I-FPELIM-NEXT: lui a0, 98
-; RV32I-FPELIM-NEXT: addi a0, a0, -1396
-; RV32I-FPELIM-NEXT: add a0, sp, a0
-; RV32I-FPELIM-NEXT: lw s0, 0(a0)
-; RV32I-FPELIM-NEXT: lui a0, 98
-; RV32I-FPELIM-NEXT: addi a0, a0, -1392
+; RV32I-FPELIM-NEXT: lui a0, 97
+; RV32I-FPELIM-NEXT: addi a0, a0, 672
; RV32I-FPELIM-NEXT: add sp, sp, a0
+; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-FPELIM-NEXT: lw s1, 2024(sp)
+; RV32I-FPELIM-NEXT: lw s0, 2028(sp)
+; RV32I-FPELIM-NEXT: .cfi_restore s0
+; RV32I-FPELIM-NEXT: .cfi_restore s1
+; RV32I-FPELIM-NEXT: addi sp, sp, 2032
+; RV32I-FPELIM-NEXT: .cfi_def_cfa_offset 0
; RV32I-FPELIM-NEXT: ret
;
; RV32I-WITHFP-LABEL: test_emergency_spill_slot:
; RV32I-WITHFP: # %bb.0:
-; RV32I-WITHFP-NEXT: lui a1, 98
-; RV32I-WITHFP-NEXT: addi a1, a1, -1376
+; RV32I-WITHFP-NEXT: addi sp, sp, -2032
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-WITHFP-NEXT: sw ra, 2028(sp)
+; RV32I-WITHFP-NEXT: sw s0, 2024(sp)
+; RV32I-WITHFP-NEXT: sw s1, 2020(sp)
+; RV32I-WITHFP-NEXT: sw s2, 2016(sp)
+; RV32I-WITHFP-NEXT: .cfi_offset ra, -4
+; RV32I-WITHFP-NEXT: .cfi_offset s0, -8
+; RV32I-WITHFP-NEXT: .cfi_offset s1, -12
+; RV32I-WITHFP-NEXT: .cfi_offset s2, -16
+; RV32I-WITHFP-NEXT: addi s0, sp, 2032
+; RV32I-WITHFP-NEXT: .cfi_def_cfa s0, 0
+; RV32I-WITHFP-NEXT: lui a1, 97
+; RV32I-WITHFP-NEXT: addi a1, a1, 688
; RV32I-WITHFP-NEXT: sub sp, sp, a1
-; RV32I-WITHFP-NEXT: lui a1, 98
-; RV32I-WITHFP-NEXT: addi a1, a1, -1380
-; RV32I-WITHFP-NEXT: add a1, sp, a1
-; RV32I-WITHFP-NEXT: sw ra, 0(a1)
-; RV32I-WITHFP-NEXT: lui a1, 98
-; RV32I-WITHFP-NEXT: addi a1, a1, -1384
-; RV32I-WITHFP-NEXT: add a1, sp, a1
-; RV32I-WITHFP-NEXT: sw s0, 0(a1)
-; RV32I-WITHFP-NEXT: lui a1, 98
-; RV32I-WITHFP-NEXT: addi a1, a1, -1388
-; RV32I-WITHFP-NEXT: add a1, sp, a1
-; RV32I-WITHFP-NEXT: sw s1, 0(a1)
-; RV32I-WITHFP-NEXT: lui a1, 98
-; RV32I-WITHFP-NEXT: addi a1, a1, -1392
-; RV32I-WITHFP-NEXT: add a1, sp, a1
-; RV32I-WITHFP-NEXT: sw s2, 0(a1)
-; RV32I-WITHFP-NEXT: lui a1, 98
-; RV32I-WITHFP-NEXT: addi a1, a1, -1376
-; RV32I-WITHFP-NEXT: add s0, sp, a1
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 400032
; RV32I-WITHFP-NEXT: lui a1, 78
; RV32I-WITHFP-NEXT: addi a1, a1, 512
; RV32I-WITHFP-NEXT: lui a2, 1048478
; RV32I-WITHFP-NEXT: #APP
; RV32I-WITHFP-NEXT: nop
; RV32I-WITHFP-NEXT: #NO_APP
-; RV32I-WITHFP-NEXT: lui a0, 98
-; RV32I-WITHFP-NEXT: addi a0, a0, -1392
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: lw s2, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 98
-; RV32I-WITHFP-NEXT: addi a0, a0, -1388
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: lw s1, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 98
-; RV32I-WITHFP-NEXT: addi a0, a0, -1384
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: lw s0, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 98
-; RV32I-WITHFP-NEXT: addi a0, a0, -1380
-; RV32I-WITHFP-NEXT: add a0, sp, a0
-; RV32I-WITHFP-NEXT: lw ra, 0(a0)
-; RV32I-WITHFP-NEXT: lui a0, 98
-; RV32I-WITHFP-NEXT: addi a0, a0, -1376
+; RV32I-WITHFP-NEXT: lui a0, 97
+; RV32I-WITHFP-NEXT: addi a0, a0, 688
; RV32I-WITHFP-NEXT: add sp, sp, a0
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 2032
+; RV32I-WITHFP-NEXT: lw s2, 2016(sp)
+; RV32I-WITHFP-NEXT: lw s1, 2020(sp)
+; RV32I-WITHFP-NEXT: lw s0, 2024(sp)
+; RV32I-WITHFP-NEXT: .cfi_def_cfa sp, 400032
+; RV32I-WITHFP-NEXT: lw ra, 2028(sp)
+; RV32I-WITHFP-NEXT: .cfi_restore ra
+; RV32I-WITHFP-NEXT: .cfi_restore s0
+; RV32I-WITHFP-NEXT: .cfi_restore s1
+; RV32I-WITHFP-NEXT: .cfi_restore s2
+; RV32I-WITHFP-NEXT: addi sp, sp, 2032
+; RV32I-WITHFP-NEXT: .cfi_def_cfa_offset 0
; RV32I-WITHFP-NEXT: ret
%data = alloca [ 100000 x i32 ] , align 4
%ptr = getelementptr inbounds [100000 x i32], [100000 x i32]* %data, i32 0, i32 80000
define void @foo() nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addi sp, sp, -2032
+; CHECK-NEXT: sd ra, 2024(sp)
; CHECK-NEXT: lui a0, 95
; CHECK-NEXT: addiw a0, a0, 1505
; CHECK-NEXT: slli a0, a0, 13
-; CHECK-NEXT: addi a0, a0, 32
+; CHECK-NEXT: addi a0, a0, -2000
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: lui a0, 781250
-; CHECK-NEXT: addiw a0, a0, 24
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: sd ra, 0(a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: call baz
-; CHECK-NEXT: lui a0, 781250
-; CHECK-NEXT: addiw a0, a0, 24
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: ld ra, 0(a0)
; CHECK-NEXT: lui a0, 95
; CHECK-NEXT: addiw a0, a0, 1505
; CHECK-NEXT: slli a0, a0, 13
-; CHECK-NEXT: addi a0, a0, 32
+; CHECK-NEXT: addi a0, a0, -2000
; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: ld ra, 2024(sp)
+; CHECK-NEXT: addi sp, sp, 2032
; CHECK-NEXT: ret
entry:
%w = alloca [100000000 x { fp128, fp128 }], align 16
--- /dev/null
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s -check-prefix=RV32I
+
+; The stack size is 2048 and the SP adjustment will be split.
+define i32 @SplitSP() nounwind {
+; RV32I-LABEL: SplitSP:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: sw ra, 2028(sp)
+; RV32I-NEXT: addi sp, sp, -16
+; RV32I-NEXT: addi a0, sp, 16
+; RV32I-NEXT: call foo
+; RV32I-NEXT: mv a0, zero
+; RV32I-NEXT: addi sp, sp, 16
+; RV32I-NEXT: lw ra, 2028(sp)
+; RV32I-NEXT: addi sp, sp, 2032
+; RV32I-NEXT: ret
+entry:
+ %xx = alloca [2028 x i8], align 1
+ %0 = getelementptr inbounds [2028 x i8], [2028 x i8]* %xx, i32 0, i32 0
+ %call = call i32 @foo(i8* nonnull %0)
+ ret i32 0
+}
+
+; The stack size is 2032 and the SP adjustment will not be split.
+define i32 @NoSplitSP() nounwind {
+; RV32I-LABEL: NoSplitSP:
+; RV32I: # %bb.0: # %entry
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: sw ra, 2028(sp)
+; RV32I-NEXT: addi a0, sp, 4
+; RV32I-NEXT: call foo
+; RV32I-NEXT: mv a0, zero
+; RV32I-NEXT: lw ra, 2028(sp)
+; RV32I-NEXT: addi sp, sp, 2032
+; RV32I-NEXT: ret
+entry:
+ %xx = alloca [2024 x i8], align 1
+ %0 = getelementptr inbounds [2024 x i8], [2024 x i8]* %xx, i32 0, i32 0
+ %call = call i32 @foo(i8* nonnull %0)
+ ret i32 0
+}
+
+declare i32 @foo(i8*)
define void @caller1024() nounwind {
; RV32I-LABEL: caller1024:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1024
-; RV32I-NEXT: sub sp, sp, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1028
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: sw ra, 0(a0)
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1032
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: sw s0, 0(a0)
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1024
-; RV32I-NEXT: add s0, sp, a0
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: sw ra, 2028(sp)
+; RV32I-NEXT: sw s0, 2024(sp)
+; RV32I-NEXT: addi s0, sp, 2032
+; RV32I-NEXT: addi sp, sp, -1040
; RV32I-NEXT: andi sp, sp, -1024
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, -2048
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: addi a0, a0, -1024
; RV32I-NEXT: sub sp, s0, a0
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1032
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: lw s0, 0(a0)
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1028
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: lw ra, 0(a0)
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, -1024
-; RV32I-NEXT: add sp, sp, a0
+; RV32I-NEXT: addi sp, sp, 1040
+; RV32I-NEXT: lw s0, 2024(sp)
+; RV32I-NEXT: lw ra, 2028(sp)
+; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
; RV64I-LABEL: caller1024:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1024
-; RV64I-NEXT: sub sp, sp, a0
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1032
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: sd ra, 0(a0)
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1040
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: sd s0, 0(a0)
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1024
-; RV64I-NEXT: add s0, sp, a0
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: sd ra, 2024(sp)
+; RV64I-NEXT: sd s0, 2016(sp)
+; RV64I-NEXT: addi s0, sp, 2032
+; RV64I-NEXT: addi sp, sp, -1040
; RV64I-NEXT: andi sp, sp, -1024
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addiw a0, a0, -1024
; RV64I-NEXT: sub sp, s0, a0
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1040
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: ld s0, 0(a0)
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1032
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: ld ra, 0(a0)
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, -1024
-; RV64I-NEXT: add sp, sp, a0
+; RV64I-NEXT: addi sp, sp, 1040
+; RV64I-NEXT: ld s0, 2016(sp)
+; RV64I-NEXT: ld ra, 2024(sp)
+; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
%1 = alloca i8, align 1024
call void @callee(i8* %1)
define void @caller2048() nounwind {
; RV32I-LABEL: caller2048:
; RV32I: # %bb.0:
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: addi a0, a0, -2048
-; RV32I-NEXT: sub sp, sp, a0
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: sw ra, 2028(sp)
+; RV32I-NEXT: sw s0, 2024(sp)
+; RV32I-NEXT: addi s0, sp, 2032
; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, 2044
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: sw ra, 0(a0)
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, 2040
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: sw s0, 0(a0)
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: addi a0, a0, -2048
-; RV32I-NEXT: add s0, sp, a0
+; RV32I-NEXT: addi a0, a0, 16
+; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: andi sp, sp, -2048
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: addi a0, a0, -2048
; RV32I-NEXT: sub sp, s0, a0
; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, 2040
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: lw s0, 0(a0)
-; RV32I-NEXT: lui a0, 1
-; RV32I-NEXT: addi a0, a0, 2044
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: lw ra, 0(a0)
-; RV32I-NEXT: lui a0, 2
-; RV32I-NEXT: addi a0, a0, -2048
+; RV32I-NEXT: addi a0, a0, 16
; RV32I-NEXT: add sp, sp, a0
+; RV32I-NEXT: lw s0, 2024(sp)
+; RV32I-NEXT: lw ra, 2028(sp)
+; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
; RV64I-LABEL: caller2048:
; RV64I: # %bb.0:
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: addiw a0, a0, -2048
-; RV64I-NEXT: sub sp, sp, a0
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: sd ra, 2024(sp)
+; RV64I-NEXT: sd s0, 2016(sp)
+; RV64I-NEXT: addi s0, sp, 2032
; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, 2040
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: sd ra, 0(a0)
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, 2032
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: sd s0, 0(a0)
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: addiw a0, a0, -2048
-; RV64I-NEXT: add s0, sp, a0
+; RV64I-NEXT: addiw a0, a0, 16
+; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: andi sp, sp, -2048
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: sub sp, s0, a0
; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, 2032
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: ld s0, 0(a0)
-; RV64I-NEXT: lui a0, 1
-; RV64I-NEXT: addiw a0, a0, 2040
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: ld ra, 0(a0)
-; RV64I-NEXT: lui a0, 2
-; RV64I-NEXT: addiw a0, a0, -2048
+; RV64I-NEXT: addiw a0, a0, 16
; RV64I-NEXT: add sp, sp, a0
+; RV64I-NEXT: ld s0, 2016(sp)
+; RV64I-NEXT: ld ra, 2024(sp)
+; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
%1 = alloca i8, align 2048
call void @callee(i8* %1)
define void @caller4096() nounwind {
; RV32I-LABEL: caller4096:
; RV32I: # %bb.0:
+; RV32I-NEXT: addi sp, sp, -2032
+; RV32I-NEXT: sw ra, 2028(sp)
+; RV32I-NEXT: sw s0, 2024(sp)
+; RV32I-NEXT: addi s0, sp, 2032
; RV32I-NEXT: lui a0, 3
+; RV32I-NEXT: addi a0, a0, -2032
; RV32I-NEXT: sub sp, sp, a0
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, -4
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: sw ra, 0(a0)
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, -8
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: sw s0, 0(a0)
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: add s0, sp, a0
; RV32I-NEXT: srli a0, sp, 12
; RV32I-NEXT: slli sp, a0, 12
; RV32I-NEXT: lui a0, 2
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: sub sp, s0, a0
; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, -8
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: lw s0, 0(a0)
-; RV32I-NEXT: lui a0, 3
-; RV32I-NEXT: addi a0, a0, -4
-; RV32I-NEXT: add a0, sp, a0
-; RV32I-NEXT: lw ra, 0(a0)
-; RV32I-NEXT: lui a0, 3
+; RV32I-NEXT: addi a0, a0, -2032
; RV32I-NEXT: add sp, sp, a0
+; RV32I-NEXT: lw s0, 2024(sp)
+; RV32I-NEXT: lw ra, 2028(sp)
+; RV32I-NEXT: addi sp, sp, 2032
; RV32I-NEXT: ret
;
; RV64I-LABEL: caller4096:
; RV64I: # %bb.0:
+; RV64I-NEXT: addi sp, sp, -2032
+; RV64I-NEXT: sd ra, 2024(sp)
+; RV64I-NEXT: sd s0, 2016(sp)
+; RV64I-NEXT: addi s0, sp, 2032
; RV64I-NEXT: lui a0, 3
+; RV64I-NEXT: addiw a0, a0, -2032
; RV64I-NEXT: sub sp, sp, a0
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, -8
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: sd ra, 0(a0)
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, -16
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: sd s0, 0(a0)
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: add s0, sp, a0
; RV64I-NEXT: srli a0, sp, 12
; RV64I-NEXT: slli sp, a0, 12
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: sub sp, s0, a0
; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, -16
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: ld s0, 0(a0)
-; RV64I-NEXT: lui a0, 3
-; RV64I-NEXT: addiw a0, a0, -8
-; RV64I-NEXT: add a0, sp, a0
-; RV64I-NEXT: ld ra, 0(a0)
-; RV64I-NEXT: lui a0, 3
+; RV64I-NEXT: addiw a0, a0, -2032
; RV64I-NEXT: add sp, sp, a0
+; RV64I-NEXT: ld s0, 2016(sp)
+; RV64I-NEXT: ld ra, 2024(sp)
+; RV64I-NEXT: addi sp, sp, 2032
; RV64I-NEXT: ret
%1 = alloca i8, align 4096
call void @callee(i8* %1)