auto &HRI = *HST.getRegisterInfo();
DebugLoc dl;
- unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
+ unsigned MaxAlign = std::max(getMaxStackAlignment(MF), getStackAlignment());
// Calculate the total stack frame size.
// Get the number of bytes to allocate from the FrameInfo.
}
}
+unsigned
+HexagonFrameLowering::getMaxStackAlignment(const MachineFunction &MF) const {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ // MFI's MaxAlignment can only grow, but we can actually reduce it
+ // for vector spills.
+ unsigned MaxAlign = 0;
+ for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+ if (MFI.isDeadObjectIndex(i))
+ continue;
+ unsigned Align = MFI.getObjectAlignment(i);
+ MaxAlign = std::max(MaxAlign, Align);
+ }
+ return MaxAlign;
+}
+
bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (!MFI.hasVarSizedObjects())
return false;
- unsigned MaxA = MFI.getMaxAlignment();
+ unsigned MaxA = getMaxStackAlignment(MF);
if (MaxA <= getStackAlignment())
return false;
return true;
const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI)
const override;
+ unsigned getMaxStackAlignment(const MachineFunction &MF) const;
+
bool needsAligna(const MachineFunction &MF) const;
const MachineInstr *getAlignaInstr(const MachineFunction &MF) const;
return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
}
+// The stack alignment on Hexagon can actually decrease in some cases,
+// specifically in some subset of cases when a variable-sized stack object
+// is present.
+// The issue is two-fold:
+// First of all, if there is a variable-sized object and the stack needs
+// extra alignment (due to pre-existing local objects), then a special
+// register will be reserved up front, acting as the aligned stack pointer
+// (call it AP). This register is only guaranteed to be live for accessing
+// these pre-existing local objects (the ones with the higher alignment).
+// Now, if the register allocator introduces vector register spills, their
+// spill slots will initially have an alignment equal to the register size,
+// which is higher than the normal stack alignment. Ideally, they should be
+// loaded/stored using AP, but AP may not be available at all required
+// places. To avoid this issue, the vector spill slots will have their
+// alignment lowered to 8, and they will be loaded/stored using unaligned
+// instructions.
+//
+// The lowering of the stack alignment may happen if the stack had a
+// variable-sized object, but otherwise retained its default alignment
+// up until register allocation. If the register allocator introduces
+// a vector spill, it will cause the max stack alignment to grow
+// (inside MachineFrameInfo). When the alignment of the spills is reset
+// back to the default stack alignment, MFI's max stack alignment will
+// not reflect that (since it cannot be lowered). Relying on that during
+// frame lowering will cause an unnecessary stack realignment.
+bool HexagonRegisterInfo::needsStackRealignment(const MachineFunction &MF)
+ const {
+ auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
+ return HFI.getMaxStackAlignment(MF) > HFI.getStackAlignment();
+}
unsigned HexagonRegisterInfo::getFirstCallerSavedNonParamReg() const {
return Hexagon::R6;
return true;
}
+ bool needsStackRealignment(const MachineFunction &MF) const;
+
/// Returns true if the frame pointer is valid.
bool useFPForScavengingIndex(const MachineFunction &MF) const override;
--- /dev/null
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; This used to crash.
+; CHECK: call f1
+
+target triple = "hexagon-unknown--elf"
+
+%struct.0 = type { [5 x i32] }
+%struct.2 = type { i32, i32, i32, %struct.1* }
+%struct.1 = type { i16*, i32, i32, i32 }
+
+@g0 = external hidden unnamed_addr constant [52 x i8], align 1
+@g1 = external hidden unnamed_addr constant [3 x i8], align 1
+
+declare extern_weak void @f0(i32, i8*, i32, i8*, ...) #0
+declare void @f1(%struct.0*, i32) #0
+
+define void @fred(i8* %a0) #0 {
+b1:
+ %v2 = alloca %struct.0, align 4
+ %v3 = alloca %struct.2, i32 undef, align 8
+ br i1 undef, label %b5, label %b4
+
+b4: ; preds = %b1
+ br label %b7
+
+b5: ; preds = %b5, %b1
+ %v6 = getelementptr inbounds %struct.2, %struct.2* %v3, i32 undef, i32 3
+ store %struct.1* undef, %struct.1** %v6, align 4
+ br label %b5
+
+b7: ; preds = %b10, %b4
+ %v8 = call i32 @llvm.hexagon.V6.extractw(<16 x i32> zeroinitializer, i32 0)
+ br i1 icmp eq (void (i32, i8*, i32, i8*, ...)* @f0, void (i32, i8*, i32, i8*, ...)* null), label %b11, label %b9
+
+b9: ; preds = %b7
+ call void (i32, i8*, i32, i8*, ...) @f0(i32 2, i8* getelementptr inbounds ([52 x i8], [52 x i8]* @g0, i32 0, i32 0), i32 2346, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @g1, i32 0, i32 0), i32 %v8)
+ unreachable
+
+b10: ; preds = %b11
+ call void @f1(%struct.0* nonnull %v2, i32 28)
+ br label %b7
+
+b11: ; preds = %b11, %b7
+ br i1 undef, label %b10, label %b11
+}
+
+declare i32 @llvm.hexagon.V6.extractw(<16 x i32>, i32) #1
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,-hvx-double" }
+attributes #1 = { nounwind readnone }