X86: Fix X86CallFrameOptimization to search for the COPY StackPointer

author Zvi Rackover <zvi.rackover@intel.com>

Tue, 24 Oct 2017 07:38:29 +0000 (07:38 +0000)

committer Zvi Rackover <zvi.rackover@intel.com>

Tue, 24 Oct 2017 07:38:29 +0000 (07:38 +0000)
author Zvi Rackover <zvi.rackover@intel.com>
Tue, 24 Oct 2017 07:38:29 +0000 (07:38 +0000)
committer Zvi Rackover <zvi.rackover@intel.com>
Tue, 24 Oct 2017 07:38:29 +0000 (07:38 +0000)
diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp

index c6022f082c8237ab8712b21d3a6759a9b6fcae36..32b18b085ded05f18c196d55a2ee6613ebb79be2 100644 (file)
--- a/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -363,14 +363,23 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
      ++I;
  
    unsigned StackPtr = RegInfo.getStackRegister();
+  auto StackPtrCopyInst = MBB.end();
    // SelectionDAG (but not FastISel) inserts a copy of ESP into a virtual
-  // register here.  If it's there, use that virtual register as stack pointer
-  // instead.
-  if (I->isCopy() && I->getOperand(0).isReg() && I->getOperand(1).isReg() &&
-      I->getOperand(1).getReg() == StackPtr) {
-    Context.SPCopy = &*I++;
-    StackPtr = Context.SPCopy->getOperand(0).getReg();
-  }
+  // register.  If it's there, use that virtual register as stack pointer
+  // instead. Also, we need to locate this instruction so that we can later
+  // safely ignore it while doing the conservative processing of the call chain.
+  // The COPY can be located anywhere between the call-frame setup
+  // instruction and its first use. We use the call instruction as a boundary
+  // because it is usually cheaper to check if an instruction is a call than
+  // checking if an instruction uses a register.
+  for (auto J = I; !J->isCall(); ++J)
+    if (J->isCopy() && J->getOperand(0).isReg() && J->getOperand(1).isReg() &&
+        J->getOperand(1).getReg() == StackPtr) {
+      StackPtrCopyInst = J;
+      Context.SPCopy = &*J++;
+      StackPtr = Context.SPCopy->getOperand(0).getReg();
+      break;
+    }
  
    // Scan the call setup sequence for the pattern we're looking for.
    // We only handle a simple case - a sequence of store instructions that
@@ -379,16 +388,15 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
    if (MaxAdjust > 4)
      Context.MovVector.resize(MaxAdjust, nullptr);
  
-  InstClassification Classification;
    DenseSet<unsigned int> UsedRegs;
  
-  while ((Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs)) !=
-         Exit) {
-    if (Classification == Skip) {
-      ++I;
+  for (InstClassification Classification = Skip; Classification != Exit; ++I) {
+    // If this is the COPY of the stack pointer, it's ok to ignore.
+    if (I == StackPtrCopyInst)
+      continue;
+    Classification = classifyInstruction(MBB, I, RegInfo, UsedRegs);
+    if (Classification != Convert)
        continue;
-    }
-
      // We know the instruction has a supported store opcode.
      // We only want movs of the form:
      // mov imm/reg, k(%StackPtr)
@@ -431,10 +439,10 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
        if (RegInfo.isPhysicalRegister(Reg))
          UsedRegs.insert(Reg);
      }
-
-    ++I;
    }
  
+  --I;
+
    // We now expect the end of the sequence. If we stopped early,
    // or reached the end of the block without finding a call, bail.
    if (I == MBB.end() || !I->isCall())
diff --git a/test/CodeGen/X86/cmpxchg-clobber-flags.ll b/test/CodeGen/X86/cmpxchg-clobber-flags.ll

index f2b9dee910372eed1b866789b933860ad08ddd71..8d289fa9fb03f3c62aedb8ddc51b10d2c584865d 100644 (file)
--- a/test/CodeGen/X86/cmpxchg-clobber-flags.ll
+++ b/test/CodeGen/X86/cmpxchg-clobber-flags.ll
@@ -31,18 +31,44 @@ define i64 @test_intervening_call(i64* %foo, i64 %bar, i64 %baz) {
  ; i386-NEXT: sahf
  ; i386-NEXT: jne
  
+; In the following case we get a long chain of EFLAGS save/restore due to
+; a sequence of:
+; cmpxchg8b (implicit-def eflags)
+; eax = copy eflags
+; adjcallstackdown32
+; ...
+; use of eax
+; During PEI the adjcallstackdown32 is replaced with the subl which
+; clobbers eflags, effectively interfering in the liveness interval.
+; Is this a case we care about? Maybe no, considering this issue
+; happens with the fast pre-regalloc scheduler enforced. A more
+; performant scheduler would move the adjcallstackdown32 out of the
+; eflags liveness interval.
+
  ; i386f-LABEL: test_intervening_call:
  ; i386f: cmpxchg8b
-; i386f-NEXT: movl %eax, (%esp)
-; i386f-NEXT: movl %edx, 4(%esp)
-; i386f-NEXT: seto %al
+; i386f-NEXT: pushl  %eax
+; i386f-NEXT: seto  %al
  ; i386f-NEXT: lahf
-; i386f-NEXT: movl %eax, [[FLAGS:%.*]]
-; i386f-NEXT: calll bar
-; i386f-NEXT: movl [[FLAGS]], %eax
-; i386f-NEXT: addb $127, %al
+; i386f-NEXT: movl  %eax, [[FLAGS:%.*]]
+; i386f-NEXT: popl  %eax
+; i386f-NEXT: subl  $8, %esp
+; i386f-NEXT: pushl  %eax
+; i386f-NEXT: movl  %ecx, %eax
+; i386f-NEXT: addb  $127, %al
  ; i386f-NEXT: sahf
-; i386f-NEXT: jne
+; i386f-NEXT: popl  %eax
+; i386f-NEXT: pushl  %eax
+; i386f-NEXT: seto  %al
+; i386f-NEXT: lahf
+; i386f-NEXT: movl  %eax, %esi
+; i386f-NEXT: popl  %eax
+; i386f-NEXT: pushl  %edx
+; i386f-NEXT: pushl  %eax
+; i386f-NEXT: calll  bar
+; i386f-NEXT: addl  $16, %esp
+; i386f-NEXT: movl  %esi, %eax
+; i386f-NEXT: addb  $127, %al
  
  ; x8664-LABEL: test_intervening_call:
  ; x8664: cmpxchgq
diff --git a/test/CodeGen/X86/movtopush.ll b/test/CodeGen/X86/movtopush.ll

index d715ccfa8c69c7463c494039f882f1c928189081..ad34381f43e0540e109815f4aa0a1813773fe61e 100644 (file)
--- a/test/CodeGen/X86/movtopush.ll
+++ b/test/CodeGen/X86/movtopush.ll
@@ -228,16 +228,16 @@ entry:
  ; NORMAL-NEXT: pushl $2
  ; NORMAL-NEXT: pushl $1
  ; NORMAL-NEXT: call
-; NORMAL-NEXT: subl $4, %esp
-; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
-; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
-; NORMAL-NEXT: movl    [[E2]], 4(%esp)
-; NORMAL-NEXT: movl    [[E1]], (%esp)
-; NORMAL-NEXT: leal 32(%esp), [[E3:%e..]]
-; NORMAL-NEXT: movl    [[E3]], 16(%esp)
-; NORMAL-NEXT: leal 28(%esp), [[E4:%e..]]
-; NORMAL-NEXT: movl    [[E4]], 12(%esp)
-; NORMAL-NEXT: movl    $6, 8(%esp)
+; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: movl (%esp), [[E1:%e..]]
+; NORMAL-NEXT: movl 4(%esp), [[E2:%e..]]
+; NORMAL-NEXT: leal 16(%esp), [[E3:%e..]]
+; NORMAL-NEXT: leal 12(%esp), [[E4:%e..]]
+; NORMAL-NEXT: pushl   [[E3]]
+; NORMAL-NEXT: pushl   [[E4]]
+; NORMAL-NEXT: pushl   $6
+; NORMAL-NEXT: pushl   [[E2]]
+; NORMAL-NEXT: pushl   [[E1]]
  ; NORMAL-NEXT: call
  ; NORMAL-NEXT: addl $20, %esp
  define void @test9() optsize {
@@ -297,10 +297,10 @@ define void @test11() optsize {
  ; Converting one mov into a push isn't worth it when 
  ; doing so forces too much overhead for other calls.
  ; NORMAL-LABEL: test12:
-; NORMAL: movl    $8, 12(%esp)
-; NORMAL-NEXT: movl    $7, 8(%esp)
-; NORMAL-NEXT: movl    $6, 4(%esp)
-; NORMAL-NEXT: movl    $5, (%esp)
+; NORMAL:       pushl  $8
+; NORMAL-NEXT:  pushl  $7
+; NORMAL-NEXT:  pushl  $6
+; NORMAL-NEXT:  pushl  $5
  ; NORMAL-NEXT: calll _good
  define void @test12() optsize {
  entry:
@@ -318,18 +318,22 @@ entry:
  ; NORMAL-NEXT: pushl    $2
  ; NORMAL-NEXT: pushl    $1
  ; NORMAL-NEXT: calll _good
-; NORMAL-NEXT: subl    $4, %esp
-; NORMAL: movl    $8, 16(%esp)
-; NORMAL-NEXT: movl    $7, 12(%esp)
-; NORMAL-NEXT: movl    $6, 8(%esp)
-; NORMAL-NEXT: calll _struct
-; NORMAL-NEXT: addl    $20, %esp
-; NORMAL-NEXT: pushl    $12
-; NORMAL-NEXT: pushl    $11
-; NORMAL-NEXT: pushl    $10
-; NORMAL-NEXT: pushl    $9
-; NORMAL-NEXT: calll _good
-; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: addl    $16, %esp
+; NORMAL=NEXT: movl  (%esp), %eax
+; NORMAL=NEXT: movl  4(%esp), %ecx
+; NORMAL=NEXT: pushl  $8
+; NORMAL=NEXT: pushl  $7
+; NORMAL=NEXT: pushl  $6
+; NORMAL=NEXT: pushl  %ecx
+; NORMAL=NEXT: pushl  %eax
+; NORMAL=NEXT: calll  _struct
+; NORMAL=NEXT: addl  $20, %esp
+; NORMAL=NEXT: pushl  $12
+; NORMAL=NEXT: pushl  $11
+; NORMAL=NEXT: pushl  $10
+; NORMAL=NEXT: pushl  $9
+; NORMAL=NEXT: calll  _good
+; NORMAL=NEXT: addl  $16, %esp
  define void @test12b() optsize {
  entry:
    %s = alloca %struct.s, align 4  
diff --git a/test/CodeGen/X86/movtopush.mir b/test/CodeGen/X86/movtopush.mir

index 82441c7134f927d5d140c72b955fce0060a57b10..42856ea169042a9ff46bea98a21bd390d8a12e99 100644 (file)
--- a/test/CodeGen/X86/movtopush.mir
+++ b/test/CodeGen/X86/movtopush.mir
@@ -1,5 +1,6 @@
  # RUN: llc -mtriple=i686-windows --run-pass="x86-cf-opt" %s -o - | FileCheck %s
  
+# PR34903
  --- |
    target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
    target triple = "i686--windows-msvc"
@@ -39,17 +40,16 @@
  # CHECK-NEXT: PUSH32i8 1, implicit-def %esp, implicit %esp
  # CHECK-NEXT: CALLpcrel32 @good, csr_32, implicit %esp, implicit-def %esp
  # CHECK-NEXT: ADJCALLSTACKUP32 16, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
-# CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
+# CHECK-NEXT: ADJCALLSTACKDOWN32 20, 0, 20, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
  # CHECK-NEXT: %1 = MOV32rm %stack.2.s, 1, _, 0, _ :: (load 4 from %stack.2.s, align 8)
  # CHECK-NEXT: %2 = MOV32rm %stack.2.s, 1, _, 4, _ :: (load 4 from %stack.2.s + 4)
-# CHECK-NEXT: %3 = COPY %esp
-# CHECK-NEXT: MOV32mr %3, 1, _, 4, _, killed %2 :: (store 4)
-# CHECK-NEXT: MOV32mr %3, 1, _, 0, _, killed %1 :: (store 4)
  # CHECK-NEXT: %4 = LEA32r %stack.0.p, 1, _, 0, _
-# CHECK-NEXT: MOV32mr %3, 1, _, 16, _, killed %4 :: (store 4 into stack + 16)
  # CHECK-NEXT: %5 = LEA32r %stack.1.q, 1, _, 0, _
-# CHECK-NEXT: MOV32mr %3, 1, _, 12, _, killed %5 :: (store 4 into stack + 12)
-# CHECK-NEXT: MOV32mi %3, 1, _, 8, _, 6 :: (store 4 into stack + 8)
+# CHECK-NEXT: PUSH32r %4, implicit-def %esp, implicit %esp
+# CHECK-NEXT: PUSH32r %5, implicit-def %esp, implicit %esp
+# CHECK-NEXT: PUSH32i8 6, implicit-def %esp, implicit %esp
+# CHECK-NEXT: PUSH32r %2, implicit-def %esp, implicit %esp
+# CHECK-NEXT: PUSH32r %1, implicit-def %esp, implicit %esp
  # CHECK-NEXT: CALLpcrel32 @struct, csr_32, implicit %esp, implicit-def %esp
  # CHECK-NEXT: ADJCALLSTACKUP32 20, 0, implicit-def dead %esp, implicit-def dead %eflags, implicit %esp
  # CHECK-NEXT: RET 0
author	Zvi Rackover <zvi.rackover@intel.com>
	Tue, 24 Oct 2017 07:38:29 +0000 (07:38 +0000)
committer	Zvi Rackover <zvi.rackover@intel.com>
	Tue, 24 Oct 2017 07:38:29 +0000 (07:38 +0000)
lib/Target/X86/X86CallFrameOptimization.cpp		patch \| blob \| history
test/CodeGen/X86/cmpxchg-clobber-flags.ll		patch \| blob \| history
test/CodeGen/X86/movtopush.ll		patch \| blob \| history
test/CodeGen/X86/movtopush.mir		patch \| blob \| history