DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+ /// Has a bit set for every virtual register for which it was determined
+ /// that it is alive across blocks.
+ BitVector MayLiveAcrossBlocks;
+
/// State of a physical register.
enum RegState {
/// A disabled register is not available for allocation, but an alias may
unsigned Hint);
LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
unsigned Hint);
- void spillAll(MachineBasicBlock::iterator MI);
+ void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
int getStackSpaceFor(unsigned VirtReg);
void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
MCPhysReg PhysReg);
+ bool mayLiveOut(unsigned VirtReg);
+
void dumpState();
};
return FrameIdx;
}
+/// Returns false if \p VirtReg is known to not live out of the current block.
+bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+ if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) {
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+
+ // If this block loops back to itself, it would be necessary to check whether
+ // the use comes after the def.
+ if (MBB->isSuccessor(MBB))
+ return true;
+
+ // See if the first \p Limit uses of the register are all in the current
+ // block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+ if (UseInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+ }
+
+ return false;
+}
+
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
}
/// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
if (LiveVirtRegs.empty())
return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
for (LiveReg &LR : LiveVirtRegs) {
if (!LR.PhysReg)
continue;
+ if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+ continue;
spillVirtReg(MI, LR);
}
LiveVirtRegs.clear();
// definitions may be used later on and we do not want to reuse
// those for virtual registers in between.
LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI);
+ spillAll(MI, /*OnlyLiveOut*/ false);
}
// Third scan.
// Spill all physical registers holding virtual registers now.
LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
- spillAll(MBB.getFirstTerminator());
+ spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
unsigned NumVirtRegs = MRI->getNumVirtRegs();
StackSlotForVirtReg.resize(NumVirtRegs);
LiveVirtRegs.setUniverse(NumVirtRegs);
+ MayLiveAcrossBlocks.clear();
+ MayLiveAcrossBlocks.resize(NumVirtRegs);
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineBasicBlock &MBB : MF)
; CHECK-O0:[[BB2]]:
; CHECK-O0: ldr x0, [sp, [[SLOT2]]]
; CHECK-O0: fcmp
-; CHECK-O0: str x0, [sp, [[SLOT3:#[0-9]+]]
+; CHECK-O0: str x0, [sp]
; CHECK-O0: b.le [[BB1]]
; reload from stack
-; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT3]]]
+; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp]
; CHECK-O0: mov x21, [[ID3]]
; CHECK-O0: ret
entry:
; GCN: bb.1:
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.5, align 4, addrspace 5)
+ ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
; GCN: renamable $vgpr19 = COPY renamable $vgpr18
; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
+ ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.6, align 4, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+ ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN: bb.2:
; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.3, align 4, addrspace 5)
- ; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
+ ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
- ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
- ; GCN: S_ENDPGM
+ ; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+ ; GCN: S_ENDPGM 0
entry:
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
%index = add i32 %id, 1
; MIPS32O0-NEXT: xor $1, $5, $4
; MIPS32O0-NEXT: sltiu $1, $1, 1
; MIPS32O0-NEXT: move $2, $5
-; MIPS32O0-NEXT: sw $1, 0($sp) # 4-byte Folded Spill
; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
-; MIPS32R6O0-NEXT: addiu $sp, $sp, -16
+; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $5
; MIPS32R6O0-NEXT: move $3, $4
-; MIPS32R6O0-NEXT: sw $5, 12($sp)
-; MIPS32R6O0-NEXT: lw $5, 12($sp)
+; MIPS32R6O0-NEXT: sw $5, 4($sp)
+; MIPS32R6O0-NEXT: lw $5, 4($sp)
; MIPS32R6O0-NEXT: lw $1, %got(x)($1)
; MIPS32R6O0-NEXT: $BB7_1: # %entry
; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS32R6O0-NEXT: sc $7, 0($1)
; MIPS32R6O0-NEXT: beqzc $7, $BB7_1
; MIPS32R6O0-NEXT: $BB7_3: # %entry
-; MIPS32R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: move $2, $6
-; MIPS32R6O0-NEXT: sw $3, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: addiu $sp, $sp, 16
+; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicCmpSwap32:
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $7, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $7, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $7, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seb $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $7, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seb $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
; MIPS32R6O0: # %bb.0: # %entry
; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp)
-; MIPS32R6O0-NEXT: addiu $sp, $sp, -16
+; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: addu $1, $2, $25
; MIPS32R6O0-NEXT: move $2, $5
; MIPS32R6O0-NEXT: move $3, $4
; MIPS32R6O0-NEXT: srlv $9, $11, $1
; MIPS32R6O0-NEXT: seb $9, $9
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: sw $9, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: addiu $sp, $sp, 16
+; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicCmpSwap8:
;
; MIPS32R6O0-LABEL: AtomicCmpSwapRes8:
; MIPS32R6O0: # %bb.0: # %entry
-; MIPS32R6O0-NEXT: addiu $sp, $sp, -24
+; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
; MIPS32R6O0-NEXT: move $1, $6
; MIPS32R6O0-NEXT: move $2, $5
; MIPS32R6O0-NEXT: move $3, $4
; MIPS32R6O0-NEXT: srlv $11, $13, $4
; MIPS32R6O0-NEXT: seb $11, $11
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: sw $5, 20($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $1, 16($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $3, 8($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $11, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5: # %entry
-; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: lw $2, 20($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: xor $1, $1, $2
; MIPS32R6O0-NEXT: sltiu $2, $1, 1
-; MIPS32R6O0-NEXT: addiu $sp, $sp, 24
+; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: AtomicCmpSwapRes8:
;
; MIPS64R6O0-LABEL: AtomicCmpSwapRes8:
; MIPS64R6O0: # %bb.0: # %entry
-; MIPS64R6O0-NEXT: daddiu $sp, $sp, -32
+; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: move $1, $6
; MIPS64R6O0-NEXT: move $2, $5
; MIPS64R6O0-NEXT: move $3, $4
; MIPS64R6O0-NEXT: srlv $11, $13, $7
; MIPS64R6O0-NEXT: seb $11, $11
; MIPS64R6O0-NEXT: # %bb.4: # %entry
-; MIPS64R6O0-NEXT: sw $2, 28($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sd $3, 16($sp) # 8-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $11, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $11, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5: # %entry
-; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
-; MIPS64R6O0-NEXT: lw $2, 28($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $1, $1, $2
; MIPS64R6O0-NEXT: sltiu $2, $1, 1
-; MIPS64R6O0-NEXT: daddiu $sp, $sp, 32
+; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MM32-LABEL: AtomicCmpSwapRes8:
; MIPS32R6O0-NEXT: srlv $7, $7, $1
; MIPS32R6O0-NEXT: seh $7, $7
; MIPS32R6O0-NEXT: # %bb.3: # %entry
-; MIPS32R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $7, 0($sp) # 4-byte Folded Spill
+; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.4: # %entry
-; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload
+; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
; MIPS32R6O0-NEXT: seh $2, $1
; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS32R6O0-LABEL: foo:
; MIPS32R6O0: # %bb.0:
-; MIPS32R6O0-NEXT: addiu $sp, $sp, -24
-; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 24
+; MIPS32R6O0-NEXT: addiu $sp, $sp, -8
+; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8
; MIPS32R6O0-NEXT: move $1, $7
; MIPS32R6O0-NEXT: move $2, $6
; MIPS32R6O0-NEXT: move $3, $5
; MIPS32R6O0-NEXT: srlv $12, $14, $4
; MIPS32R6O0-NEXT: seh $12, $12
; MIPS32R6O0-NEXT: # %bb.4:
-; MIPS32R6O0-NEXT: sw $1, 20($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $2, 16($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $3, 12($sp) # 4-byte Folded Spill
-; MIPS32R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: sw $12, 0($sp) # 4-byte Folded Spill
; MIPS32R6O0-NEXT: # %bb.5:
; MIPS32R6O0-NEXT: sltiu $3, $2, 1
; MIPS32R6O0-NEXT: sync
; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
-; MIPS32R6O0-NEXT: addiu $sp, $sp, 24
+; MIPS32R6O0-NEXT: addiu $sp, $sp, 8
; MIPS32R6O0-NEXT: jrc $ra
;
; MIPS4-LABEL: foo:
; MIPS64R6O0-NEXT: srlv $11, $13, $3
; MIPS64R6O0-NEXT: seh $11, $11
; MIPS64R6O0-NEXT: # %bb.4:
-; MIPS64R6O0-NEXT: sd $5, 8($sp) # 8-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $2, 4($sp) # 4-byte Folded Spill
-; MIPS64R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill
+; MIPS64R6O0-NEXT: sw $11, 8($sp) # 4-byte Folded Spill
; MIPS64R6O0-NEXT: # %bb.5:
-; MIPS64R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: seh $2, $1
-; MIPS64R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: xor $2, $3, $2
; MIPS64R6O0-NEXT: sltiu $3, $2, 1
; MIPS64R6O0-NEXT: sync
-; MIPS64R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload
+; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload
; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; MIPS32O0: # %bb.0: # %entry
; MIPS32O0-NEXT: lui $2, %hi(_gp_disp)
; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp)
-; MIPS32O0-NEXT: addiu $sp, $sp, -8
; MIPS32O0-NEXT: addu $1, $2, $25
; MIPS32O0-NEXT: sync
; MIPS32O0-NEXT: lw $1, %got(a)($1)
; MIPS32O0-NEXT: xor $2, $5, $2
; MIPS32O0-NEXT: sltiu $2, $2, 1
; MIPS32O0-NEXT: andi $2, $2, 1
-; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill
-; MIPS32O0-NEXT: addiu $sp, $sp, 8
; MIPS32O0-NEXT: jr $ra
; MIPS32O0-NEXT: nop
;
;
; MIPS64R6O0-LABEL: AtomicCmpSwap64:
; MIPS64R6O0: # %bb.0: # %entry
-; MIPS64R6O0-NEXT: daddiu $sp, $sp, -32
+; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16
; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap64)))
; MIPS64R6O0-NEXT: daddu $1, $1, $25
; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64)))
; MIPS64R6O0-NEXT: move $2, $5
; MIPS64R6O0-NEXT: move $3, $4
-; MIPS64R6O0-NEXT: sd $5, 24($sp)
-; MIPS64R6O0-NEXT: ld $5, 24($sp)
+; MIPS64R6O0-NEXT: sd $5, 8($sp)
+; MIPS64R6O0-NEXT: ld $5, 8($sp)
; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1)
; MIPS64R6O0-NEXT: .LBB7_1: # %entry
; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1
; MIPS64R6O0-NEXT: scd $7, 0($1)
; MIPS64R6O0-NEXT: beqzc $7, .LBB7_1
; MIPS64R6O0-NEXT: .LBB7_3: # %entry
-; MIPS64R6O0-NEXT: sd $2, 16($sp) # 8-byte Folded Spill
+; MIPS64R6O0-NEXT: sd $2, 0($sp) # 8-byte Folded Spill
; MIPS64R6O0-NEXT: move $2, $6
-; MIPS64R6O0-NEXT: sd $3, 8($sp) # 8-byte Folded Spill
-; MIPS64R6O0-NEXT: daddiu $sp, $sp, 32
+; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16
; MIPS64R6O0-NEXT: jrc $ra
;
; O1-LABEL: AtomicCmpSwap64:
define void @foo(i32 %new, i32 %old) {
; O32-LABEL: foo:
; O32: # %bb.0: # %entry
-; O32-NEXT: addiu $sp, $sp, -16
-; O32-NEXT: .cfi_def_cfa_offset 16
; O32-NEXT: move $1, $5
; O32-NEXT: move $2, $4
; O32-NEXT: lui $3, %hi(sym)
; O32-NEXT: nop
; O32-NEXT: $BB0_3: # %entry
; O32-NEXT: sync
-; O32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill
-; O32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill
-; O32-NEXT: sw $6, 4($sp) # 4-byte Folded Spill
-; O32-NEXT: addiu $sp, $sp, 16
; O32-NEXT: jr $ra
; O32-NEXT: nop
;
; N32-LABEL: foo:
; N32: # %bb.0: # %entry
-; N32-NEXT: addiu $sp, $sp, -16
-; N32-NEXT: .cfi_def_cfa_offset 16
; N32-NEXT: move $1, $5
; N32-NEXT: sll $1, $1, 0
; N32-NEXT: move $2, $4
; N32-NEXT: nop
; N32-NEXT: .LBB0_3: # %entry
; N32-NEXT: sync
-; N32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
-; N32-NEXT: addiu $sp, $sp, 16
; N32-NEXT: jr $ra
; N32-NEXT: nop
;
; N64-LABEL: foo:
; N64: # %bb.0: # %entry
-; N64-NEXT: daddiu $sp, $sp, -16
-; N64-NEXT: .cfi_def_cfa_offset 16
; N64-NEXT: move $1, $5
; N64-NEXT: sll $1, $1, 0
; N64-NEXT: move $2, $4
; N64-NEXT: nop
; N64-NEXT: .LBB0_3: # %entry
; N64-NEXT: sync
-; N64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill
-; N64-NEXT: daddiu $sp, $sp, 16
; N64-NEXT: jr $ra
; N64-NEXT: nop
entry:
; ASM: SWDSP
; ASM: SWDSP
-; ASM: SWDSP
; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp)
; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp)
; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp)
-; MM-OBJ: sw ${{[0-9]+}}, {{[0-9]+}}($sp)
true:
ret <4 x i8> %c
; CHECK-NEXT: mr 4, 10
; CHECK-NEXT: clrldi 4, 4, 32
; CHECK-NEXT: std 4, 0(3)
-; CHECK-NEXT: std 6, -8(1) # 8-byte Folded Spill
; CHECK-NEXT: blr
%1 = load i64, i64* %a, align 8
%conv = zext i64 %1 to i128
; CHECK-FISL: # %bb.0: # %entry
; CHECK-FISL-NEXT: xxlor vs0, v2, v3
; CHECK-FISL-NEXT: xxlnor v2, v2, v3
-; CHECK-FISL-NEXT: li r3, -16
-; CHECK-FISL-NEXT: stxvd2x vs0, r1, r3 # 16-byte Folded Spill
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test14:
; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
; CHECK-FISL-NEXT: xxlnor vs0, v2, v3
; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
-; CHECK-FISL-NEXT: li r3, -16
-; CHECK-FISL-NEXT: stxvd2x v4, r1, r3 # 16-byte Folded Spill
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test15:
; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
; CHECK-FISL-NEXT: xxlnor vs0, v2, v3
; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
-; CHECK-FISL-NEXT: li r3, -16
-; CHECK-FISL-NEXT: stxvd2x v4, r1, r3 # 16-byte Folded Spill
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test16:
; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
; CHECK-FISL-NEXT: xxlandc vs0, v2, v3
; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
-; CHECK-FISL-NEXT: li r3, -16
-; CHECK-FISL-NEXT: stxvd2x v4, r1, r3 # 16-byte Folded Spill
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test18:
; CHECK-FISL-NEXT: xxlor v4, vs0, vs0
; CHECK-FISL-NEXT: xxlandc vs0, v2, v3
; CHECK-FISL-NEXT: xxlor v2, vs0, vs0
-; CHECK-FISL-NEXT: li r3, -16
-; CHECK-FISL-NEXT: stxvd2x v4, r1, r3 # 16-byte Folded Spill
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test19:
; CHECK-FISL-LABEL: test51:
; CHECK-FISL: # %bb.0:
; CHECK-FISL-NEXT: xxspltd v2, v2, 0
-; CHECK-FISL-NEXT: li r3, -16
-; CHECK-FISL-NEXT: stxvd2x v3, r1, r3 # 16-byte Folded Spill
; CHECK-FISL-NEXT: blr
;
; CHECK-LE-LABEL: test51:
# CHECK-NEXT: renamable $r12 = COPY killed renamable $r0
# CHECK-NEXT: t2STRi12 killed $r12, %stack.1, 0, 14, $noreg :: (store 4 into %stack.1)
# CHECK-NEXT: $r8 = t2LDRi12 %stack.1, 0, 14, $noreg :: (load 4 from %stack.1)
-# CHECK-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12
+# CHECK-NEXT: INLINEASM &"@ $0", 1, 589833, renamable $r8, 12, implicit-def early-clobber $r12
# CHECK-NEXT: tBX_RET 14, $noreg
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movl $0, (%rdi)
; CHECK-O0-NEXT: movl $0, 4(%rdi)
-; CHECK-O0-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-O0-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: widen_zero_init:
; CHECK-O0: # %bb.0:
; CHECK-O0-NEXT: movl $0, (%rdi)
; CHECK-O0-NEXT: movl $0, 4(%rdi)
-; CHECK-O0-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-O0-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: widen_zero_init_unaligned:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: addq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_add1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: addq $-15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_sub1:
; CHECK-O0-NEXT: leaq (%rax,%rax,4), %rax
; CHECK-O0-NEXT: leaq (%rax,%rax,2), %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_mul1:
; CHECK-O0-NEXT: sarq $3, %rdx
; CHECK-O0-NEXT: addq %rcx, %rdx
; CHECK-O0-NEXT: movq %rdx, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_sdiv1:
; CHECK-O0-NEXT: mulq %rcx
; CHECK-O0-NEXT: shrq $3, %rdx
; CHECK-O0-NEXT: movq %rdx, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_udiv1:
; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx
; CHECK-O0-NEXT: subq %rcx, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_srem1:
; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; CHECK-O0-NEXT: subq %rax, %rcx
; CHECK-O0-NEXT: movq %rcx, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_urem1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: shlq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_shl1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: shrq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_lshr1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: sarq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_ashr1:
; CHECK-O0-NEXT: andl $15, %ecx
; CHECK-O0-NEXT: movl %ecx, %eax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_and1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: orq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_or1:
; CHECK-O0-NEXT: movq (%rdi), %rax
; CHECK-O0-NEXT: xorq $15, %rax
; CHECK-O0-NEXT: movq %rax, (%rdi)
-; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-O0-NEXT: retq
;
; CHECK-O3-LABEL: rmw_fold_xor1:
; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
-; X86-NOCMOV-NEXT: subl $24, %esp
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jge .LBB6_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: jne .LBB6_2
; X86-NOCMOV-NEXT: jmp .LBB6_1
; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $24, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
-; X86-NOCMOV-NEXT: subl $24, %esp
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jle .LBB7_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: jne .LBB7_2
; X86-NOCMOV-NEXT: jmp .LBB7_1
; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $24, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
-; X86-NOCMOV-NEXT: subl $24, %esp
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: ja .LBB8_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: jne .LBB8_2
; X86-NOCMOV-NEXT: jmp .LBB8_1
; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $24, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
; X86-NOCMOV: # %bb.0:
; X86-NOCMOV-NEXT: pushl %ebx
; X86-NOCMOV-NEXT: pushl %esi
-; X86-NOCMOV-NEXT: subl $24, %esp
+; X86-NOCMOV-NEXT: subl $20, %esp
; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NOCMOV-NEXT: movl sc32, %ecx
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: subl %edx, %ecx
; X86-NOCMOV-NEXT: movl %eax, %esi
; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NOCMOV-NEXT: jbe .LBB9_4
; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start
; X86-NOCMOV-NEXT: jne .LBB9_2
; X86-NOCMOV-NEXT: jmp .LBB9_1
; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end
-; X86-NOCMOV-NEXT: addl $24, %esp
+; X86-NOCMOV-NEXT: addl $20, %esp
; X86-NOCMOV-NEXT: popl %esi
; X86-NOCMOV-NEXT: popl %ebx
; X86-NOCMOV-NEXT: retl
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: movl $1, %ecx
; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip)
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: retq
;
; X86-LABEL: atomic_fetch_cmpxchg32:
; X86: # %bb.0:
-; X86-NEXT: pushl %eax
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: movl $1, %ecx
; X86-NEXT: lock cmpxchgl %ecx, sc32
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: popl %eax
; X86-NEXT: retl
%t1 = cmpxchg i32* @sc32, i32 0, i32 1 acquire acquire
ret void
; X64-LABEL: atomic_fetch_swap32:
; X64: # %bb.0:
; X64-NEXT: xchgl %edi, {{.*}}(%rip)
-; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: retq
;
; X86-LABEL: atomic_fetch_swap32:
; X86: # %bb.0:
-; X86-NEXT: pushl %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: xchgl %eax, sc32
-; X86-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NEXT: popl %eax
; X86-NEXT: retl
%t1 = atomicrmw xchg i32* @sc32, i32 %x acquire
ret void
; X64: # %bb.0:
; X64-NEXT: movd %xmm0, %eax
; X64-NEXT: xchgl %eax, {{.*}}(%rip)
-; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
; X64-NEXT: retq
;
; X86-CMOV-LABEL: atomic_fetch_swapf32:
; X86-CMOV: # %bb.0:
-; X86-CMOV-NEXT: pushl %eax
; X86-CMOV-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X86-CMOV-NEXT: movd %xmm0, %eax
; X86-CMOV-NEXT: xchgl %eax, fsc32
-; X86-CMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-CMOV-NEXT: popl %eax
; X86-CMOV-NEXT: retl
;
; X86-NOCMOV-LABEL: atomic_fetch_swapf32:
; X86-NOCMOV: # %bb.0:
-; X86-NOCMOV-NEXT: subl $8, %esp
+; X86-NOCMOV-NEXT: pushl %eax
; X86-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
-; X86-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp)
-; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NOCMOV-NEXT: fstps (%esp)
+; X86-NOCMOV-NEXT: movl (%esp), %eax
; X86-NOCMOV-NEXT: xchgl %eax, fsc32
-; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill
-; X86-NOCMOV-NEXT: addl $8, %esp
+; X86-NOCMOV-NEXT: popl %eax
; X86-NOCMOV-NEXT: retl
%t1 = atomicrmw xchg float* @fsc32, float %x acquire
ret void
; I486-LABEL: atomic_fetch_add64:
; I486: # %bb.0: # %entry
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $56, %esp
+; I486-NEXT: subl $48, %esp
; I486-NEXT: leal sc64, %eax
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: movl $2, 12(%ecx)
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_add_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $56, %esp
+; I486-NEXT: addl $48, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
entry:
; I486-LABEL: atomic_fetch_sub64:
; I486: # %bb.0:
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $56, %esp
+; I486-NEXT: subl $48, %esp
; I486-NEXT: leal sc64, %eax
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: movl $2, 12(%ecx)
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_sub_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $56, %esp
+; I486-NEXT: addl $48, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw sub i64* @sc64, i64 1 acquire
; I486-LABEL: atomic_fetch_and64:
; I486: # %bb.0:
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $44, %esp
+; I486-NEXT: subl $36, %esp
; I486-NEXT: leal sc64, %eax
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: movl $2, 12(%ecx)
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_and_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $44, %esp
+; I486-NEXT: addl $36, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw and i64* @sc64, i64 3 acquire
; I486-LABEL: atomic_fetch_or64:
; I486: # %bb.0:
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $44, %esp
+; I486-NEXT: subl $36, %esp
; I486-NEXT: leal sc64, %eax
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: movl $2, 12(%ecx)
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_or_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $44, %esp
+; I486-NEXT: addl $36, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw or i64* @sc64, i64 3 acquire
; I486-LABEL: atomic_fetch_xor64:
; I486: # %bb.0:
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $44, %esp
+; I486-NEXT: subl $36, %esp
; I486-NEXT: leal sc64, %eax
; I486-NEXT: movl %esp, %ecx
; I486-NEXT: movl $2, 12(%ecx)
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_xor_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $44, %esp
+; I486-NEXT: addl $36, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw xor i64* @sc64, i64 3 acquire
; I486-LABEL: atomic_fetch_nand64:
; I486: # %bb.0:
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $28, %esp
+; I486-NEXT: subl $20, %esp
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: leal sc64, %edx
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_fetch_nand_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $28, %esp
+; I486-NEXT: addl $20, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw nand i64* @sc64, i64 %x acquire
; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $80, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl 8(%ebp), %ecx
; I486-NEXT: movl sc64+4, %edx
; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jge .LBB6_4
; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $80, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl 8(%ebp), %ecx
; I486-NEXT: movl sc64+4, %edx
; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jge .LBB7_4
; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $80, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl 8(%ebp), %ecx
; I486-NEXT: movl sc64+4, %edx
; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jb .LBB8_4
; I486-NEXT: pushl %edi
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $80, %esp
+; I486-NEXT: subl $72, %esp
; I486-NEXT: movl 12(%ebp), %eax
; I486-NEXT: movl 8(%ebp), %ecx
; I486-NEXT: movl sc64+4, %edx
; I486-NEXT: movl %eax, %ebx
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: jae .LBB9_4
; X64-NEXT: # kill: def $rax killed $eax
; X64-NEXT: movl $1, %ecx
; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip)
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: retq
;
; I486-LABEL: atomic_fetch_cmpxchg64:
; I486-NEXT: movl $sc64, (%edx)
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_compare_exchange_8
-; I486-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; I486-NEXT: movl %ebp, %esp
; I486-NEXT: popl %ebp
; I486-NEXT: retl
; X64-LABEL: atomic_fetch_swap64:
; X64: # %bb.0:
; X64-NEXT: xchgq %rdi, {{.*}}(%rip)
-; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: retq
;
; I486-LABEL: atomic_fetch_swap64:
; I486: # %bb.0:
; I486-NEXT: pushl %esi
-; I486-NEXT: subl $28, %esp
+; I486-NEXT: subl $20, %esp
; I486-NEXT: movl {{[0-9]+}}(%esp), %eax
; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx
; I486-NEXT: leal sc64, %edx
; I486-NEXT: movl $sc64, (%esi)
; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_exchange_8
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: addl $28, %esp
+; I486-NEXT: addl $20, %esp
; I486-NEXT: popl %esi
; I486-NEXT: retl
%t1 = atomicrmw xchg i64* @sc64, i64 %x acquire
; X64: # %bb.0:
; X64-NEXT: movq %xmm0, %rax
; X64-NEXT: xchgq %rax, {{.*}}(%rip)
-; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; X64-NEXT: retq
;
; I486-LABEL: atomic_fetch_swapf64:
; I486-NEXT: movl %esp, %ebp
; I486-NEXT: pushl %esi
; I486-NEXT: andl $-8, %esp
-; I486-NEXT: subl $48, %esp
+; I486-NEXT: subl $40, %esp
; I486-NEXT: fldl 8(%ebp)
; I486-NEXT: leal fsc64, %eax
; I486-NEXT: fstpl {{[0-9]+}}(%esp)
; I486-NEXT: movl $fsc64, (%esi)
; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: calll __atomic_exchange_8
-; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; I486-NEXT: leal -4(%ebp), %esp
; I486-NEXT: popl %esi
; I486-NEXT: popl %ebp
; X32-LABEL: atomic_fetch_cmpxchg64:
; X32: # %bb.0:
; X32-NEXT: pushl %ebx
-; X32-NEXT: subl $12, %esp
+; X32-NEXT: pushl %eax
; X32-NEXT: xorl %eax, %eax
; X32-NEXT: movl $1, %ebx
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X32-NEXT: movl %eax, (%esp) # 4-byte Spill
+; X32-NEXT: movl (%esp), %edx # 4-byte Reload
+; X32-NEXT: movl (%esp), %ecx # 4-byte Reload
; X32-NEXT: lock cmpxchg8b sc64
-; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X32-NEXT: movl %edx, (%esp) # 4-byte Spill
-; X32-NEXT: addl $12, %esp
+; X32-NEXT: addl $4, %esp
; X32-NEXT: popl %ebx
; X32-NEXT: retl
%t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire
define void @test_xmm(i32 %shift, i32 %mulp, <2 x i64> %a,i8* %arraydecay,i8* %fname){
; CHECK-LABEL: test_xmm:
; CHECK: ## %bb.0:
-; CHECK-NEXT: subq $72, %rsp
-; CHECK-NEXT: .cfi_def_cfa_offset 80
+; CHECK-NEXT: subq $56, %rsp
+; CHECK-NEXT: .cfi_def_cfa_offset 64
; CHECK-NEXT: vpmovw2m %xmm0, %k0
; CHECK-NEXT: movl $2, %esi
; CHECK-NEXT: movl $8, %eax
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx ## 4-byte Reload
; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
-; CHECK-NEXT: movw %r8w, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
+; CHECK-NEXT: movw %r8w, (%rsp) ## 2-byte Spill
; CHECK-NEXT: callq _calc_expected_mask_val
; CHECK-NEXT: movw %ax, %r8w
-; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r10w ## 2-byte Reload
+; CHECK-NEXT: movw (%rsp), %r10w ## 2-byte Reload
; CHECK-NEXT: movzwl %r10w, %edi
; CHECK-NEXT: movzwl %r8w, %esi
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload
; CHECK-NEXT: callq _check_mask16
-; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill
-; CHECK-NEXT: addq $72, %rsp
+; CHECK-NEXT: addq $56, %rsp
; CHECK-NEXT: retq
%d2 = bitcast <2 x i64> %a to <8 x i16>
%m2 = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %d2)
; CHECK-NEXT: #APP
; CHECK-NEXT: #NO_APP
; CHECK-NEXT: movq %rdx, %rax
-; CHECK-NEXT: movq %rdx, -8(%rsp)
; CHECK-NEXT: ret
define i64 @foo() {
; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm24, %zmm24
; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp)
; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0
-; CHECK-NEXT: vmovss %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
-; CHECK-NEXT: vmovss %xmm15, (%rsp) # 4-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; 686-O0-NEXT: .cfi_def_cfa_offset 16
; 686-O0-NEXT: pushl %esi
; 686-O0-NEXT: .cfi_def_cfa_offset 20
-; 686-O0-NEXT: subl $24, %esp
-; 686-O0-NEXT: .cfi_def_cfa_offset 44
+; 686-O0-NEXT: subl $1, %esp
+; 686-O0-NEXT: .cfi_def_cfa_offset 21
; 686-O0-NEXT: .cfi_offset %esi, -20
; 686-O0-NEXT: .cfi_offset %edi, -16
; 686-O0-NEXT: .cfi_offset %ebx, -12
; 686-O0-NEXT: xorl $208307499, %eax # imm = 0xC6A852B
; 686-O0-NEXT: xorl $-2, %ecx
; 686-O0-NEXT: orl %ecx, %eax
-; 686-O0-NEXT: setne {{[0-9]+}}(%esp)
+; 686-O0-NEXT: setne (%esp)
; 686-O0-NEXT: movl var_5, %ecx
; 686-O0-NEXT: movl %ecx, %edx
; 686-O0-NEXT: sarl $31, %edx
; 686-O0-NEXT: movzbl %bl, %ebp
; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E
; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4
-; 686-O0-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 686-O0-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 686-O0-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 686-O0-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; 686-O0-NEXT: movl %edi, (%esp) # 4-byte Spill
-; 686-O0-NEXT: addl $24, %esp
+; 686-O0-NEXT: addl $1, %esp
; 686-O0-NEXT: .cfi_def_cfa_offset 20
; 686-O0-NEXT: popl %esi
; 686-O0-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
; CHECK-NEXT: # implicit-def: $rax
; CHECK-NEXT: movdqu %xmm1, (%rax)
-; CHECK-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill
; CHECK-NEXT: .LBB0_2:
; CHECK-NEXT: retq
indirectbr i8* undef, [label %9, label %1]
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-32, %rsp
-; CHECK-NEXT: subq $320, %rsp # imm = 0x140
+; CHECK-NEXT: subq $128, %rsp
; CHECK-NEXT: vmovaps 240(%rbp), %ymm8
; CHECK-NEXT: vmovaps 208(%rbp), %ymm9
; CHECK-NEXT: vmovaps 176(%rbp), %ymm10
; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7]
; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm5, %ymm1
-; CHECK-NEXT: vmovaps %ymm3, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
+; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill
; CHECK-NEXT: vmovaps %ymm9, %ymm3
-; CHECK-NEXT: vmovaps %ymm10, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm12, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm13, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm14, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm15, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
-; CHECK-NEXT: vmovaps %ymm4, (%rsp) # 32-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: andq $-512, %rsp # imm = 0xFE00
-; CHECK-NEXT: subq $2048, %rsp # imm = 0x800
+; CHECK-NEXT: subq $1536, %rsp # imm = 0x600
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi
; CHECK-NEXT: callq test
; CHECK-NEXT: vmovupd {{[0-9]+}}(%rsp), %xmm0
; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload
; CHECK-NEXT: # xmm0 = mem[0],zero
-; CHECK-NEXT: vmovsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm5, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm7, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm16, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm17, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm18, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm19, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm20, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm21, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm22, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm23, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm24, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm25, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm26, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm27, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm4, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm6, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm28, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm29, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm30, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm31, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: movq %rbp, %rsp
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa %rsp, 8
; CHECK-APPLE: retq
; CHECK-O0-LABEL: conditionally_forward_swifterror:
-; CHECK-O0: subq $24, %rsp
-; CHECK-O0: movq %r12, [[REG1:%[a-z0-9]+]]
+; CHECK-O0: pushq [[REG1:%[a-z0-9]+]]
+; CHECK-O0: movq %r12, [[REG1]]
; CHECK-O0: cmpl $0, %edi
-; CHECK-O0-DAG: movq [[REG1]], [[STK:[0-9]+]](%rsp)
-; CHECK-O0-DAG: movq %r12, [[STK2:[0-9]+]](%rsp)
+; CHECK-O0-DAG: movq %r12, (%rsp)
; CHECK-O0: je
-; CHECK-O0: movq [[STK2]](%rsp), [[REG:%[a-z0-9]+]]
+; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]]
; CHECK-O0: movq [[REG]], %r12
; CHECK-O0: callq _moo
-; CHECK-O0: addq $24, %rsp
+; CHECK-O0: popq [[REG1]]
; CHECK-O0: retq
-; CHECK-O0: movq [[STK2]](%rsp), [[REG:%[a-z0-9]+]]
+; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]]
; CHECK-O0: xorps %xmm0, %xmm0
; CHECK-O0: movq [[REG]], %r12
-; CHECK-O0: addq $24, %rsp
+; CHECK-O0: popq [[REG1]]
; CHECK-O0: retq
entry:
%cond = icmp ne i32 %cc, 0
; CHECK: [0x0000000000000000, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4
; 0x0000000000000006 - 0x0000000000000008: rbp-8, piece 0x8, rax, piece 0x4 )
-; CHECK: [0x[[LTMP3]], {{.*}}): DW_OP_breg6 RBP-8, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"