From: Luo, Yuanke Date: Sat, 10 Aug 2019 02:49:02 +0000 (+0000) Subject: [X86] Fix stack probe issue on windows32. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=81af45aba0fdfae824dd37b2b80b283adf3ac961;p=llvm [X86] Fix stack probe issue on windows32. Summary: On windows if the frame size exceed 4096 bytes, compiler need to generate a call to _alloca_probe. X86CallFrameOptimization pass changes the reserved stack size and cause of stack probe function not be inserted. This patch fix the issue by detecting the call frame size, if the size exceed 4096 bytes, drop X86CallFrameOptimization. Reviewers: craig.topper, wxiao3, annita.zhang, rnk, RKSimon Reviewed By: rnk Subscribers: hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D65923 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@368503 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 4b6f5aba9f0..7796945f1c3 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -155,12 +155,22 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) { // This is bad, and breaks SP adjustment. // So, check that all of the frames in the function are closed inside // the same block, and, for good measure, that there are no nested frames. + // + // If any call allocates more argument stack memory than the stack + // probe size, don't do this optimization. Otherwise, this pass + // would need to synthesize additional stack probe calls to allocate + // memory for arguments. unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode(); unsigned FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + bool UseStackProbe = + !STI->getTargetLowering()->getStackProbeSymbolName(MF).empty(); + unsigned StackProbeSize = STI->getTargetLowering()->getStackProbeSize(MF); for (MachineBasicBlock &BB : MF) { bool InsideFrameSequence = false; for (MachineInstr &MI : BB) { if (MI.getOpcode() == FrameSetupOpcode) { + if (TII->getFrameSize(MI) >= StackProbeSize && UseStackProbe) + return false; if (InsideFrameSequence) return false; InsideFrameSequence = true; diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index aff686e9cae..47be92e5972 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1022,14 +1022,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); bool UseStackProbe = !STI.getTargetLowering()->getStackProbeSymbolName(MF).empty(); - - // The default stack probe size is 4096 if the function has no stackprobesize - // attribute. - unsigned StackProbeSize = 4096; - if (Fn.hasFnAttribute("stack-probe-size")) - Fn.getFnAttribute("stack-probe-size") - .getValueAsString() - .getAsInteger(0, StackProbeSize); + unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); // Re-align the stack on 64-bit if the x86-interrupt calling convention is // used and an error code was pushed, since the x86-64 ABI requires a 16-byte diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2d3581fe403..0eb8320d5c5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -44970,3 +44970,16 @@ X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; } + +unsigned +X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { + // The default stack probe size is 4096 if the function has no stackprobesize + // attribute. + unsigned StackProbeSize = 4096; + const Function &Fn = MF.getFunction(); + if (Fn.hasFnAttribute("stack-probe-size")) + Fn.getFnAttribute("stack-probe-size") + .getValueAsString() + .getAsInteger(0, StackProbeSize); + return StackProbeSize; +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 8dc58a188dd..09b0f6bc42b 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -1207,6 +1207,8 @@ namespace llvm { StringRef getStackProbeSymbolName(MachineFunction &MF) const override; + unsigned getStackProbeSize(MachineFunction &MF) const; + bool hasVectorBlend() const override { return true; } unsigned getMaxSupportedInterleaveFactor() const override { return 4; } diff --git a/test/CodeGen/X86/nomovtopush.ll b/test/CodeGen/X86/nomovtopush.ll new file mode 100644 index 00000000000..f690c2377dd --- /dev/null +++ b/test/CodeGen/X86/nomovtopush.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-windows-msvc | FileCheck %s + +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc" + +%struct._param_str = type { i32, i32, [4096 x i32], i32 } + +@g_d = common dso_local local_unnamed_addr global i32 0, align 4 +@g_c = common dso_local local_unnamed_addr global i32 0, align 4 +@g_b = common dso_local local_unnamed_addr global i32 0, align 4 +@g_a = common dso_local local_unnamed_addr global i32 0, align 4 +@g_param = common dso_local global %struct._param_str zeroinitializer, align 4 + +; Function Attrs: nounwind +define dso_local i32 @test() local_unnamed_addr { +; CHECK-LABEL: test: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl $16396, %eax # imm = 0x400C +; CHECK-NEXT: calll __chkstk +; CHECK-NEXT: movl _g_d, %eax +; CHECK-NEXT: movl _g_c, %ecx +; CHECK-NEXT: movl _g_b, %edx +; CHECK-NEXT: movl _g_a, %esi +; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %edx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movl %esi, (%esp) +; CHECK-NEXT: calll _bar +; CHECK-NEXT: movl $4099, %ecx # imm = 0x1003 +; CHECK-NEXT: movl %esp, %edi +; CHECK-NEXT: movl $_g_param, %esi +; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) +; CHECK-NEXT: calll _foo +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: addl $16396, %esp # imm = 0x400C +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: retl +entry: + %0 = load i32, i32* @g_d, align 4, !tbaa !3 + %1 = load i32, i32* @g_c, align 4, !tbaa !3 + %2 = load i32, i32* @g_b, align 4, !tbaa !3 + %3 = load i32, i32* @g_a, align 4, !tbaa !3 + %call = tail call i32 @bar(i32 %3, i32 %2, i32 %1, i32 %0) #2 + tail call void @foo(%struct._param_str* byval nonnull align 4 @g_param) #2 + ret i32 0 +} + +declare dso_local i32 @bar(i32, i32, i32, i32) local_unnamed_addr + +declare dso_local void @foo(%struct._param_str* byval align 4) local_unnamed_addr + +!3 = !{!4, !4, i64 0} +!4 = !{!"int", !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"}