/// anything was changed.
virtual bool expandPostRAPseudo(MachineInstr &MI) const { return false; }
+ /// Check whether the target can fold a load that feeds a subreg operand
+ /// (or a subreg operand that feeds a store).
+ /// For example, X86 may want to return true if it can fold
+ /// movl (%esp), %eax
+ /// subb, %al, ...
+ /// Into:
+ /// subb (%esp), ...
+ ///
+ /// Ideally, we'd like the target implementation of foldMemoryOperand() to
+ /// reject subregs - but since this behavior used to be enforced in the
+ /// target-independent code, moving this responsibility to the targets
+ /// has the potential of causing nasty silent breakage in out-of-tree targets.
+ virtual bool isSubregFoldable() const { return false; }
+
/// Attempt to fold a load or store of the specified stack
/// slot into the specified machine instruction for the specified operand(s).
/// If this is possible, a new instruction is returned with the specified
bool WasCopy = MI->isCopy();
unsigned ImpReg = 0;
- bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
- MI->getOpcode() == TargetOpcode::PATCHPOINT ||
- MI->getOpcode() == TargetOpcode::STACKMAP);
+ // Spill subregs if the target allows it.
+ // We always want to spill subregs for stackmap/patchpoint pseudos.
+ bool SpillSubRegs = TII.isSubregFoldable() ||
+ MI->getOpcode() == TargetOpcode::STATEPOINT ||
+ MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+ MI->getOpcode() == TargetOpcode::STACKMAP;
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
ImpReg = MO.getReg();
continue;
}
- // FIXME: Teach targets to deal with subregs.
+
if (!SpillSubRegs && MO.getSubReg())
return false;
// We cannot fold a load instruction into a def.
assert(MBB && "foldMemoryOperand needs an inserted instruction");
MachineFunction &MF = *MBB->getParent();
+ // If we're not folding a load into a subreg, the size of the load is the
+ // size of the spill slot. But if we are, we need to figure out what the
+ // actual load size is.
+ int64_t MemSize = 0;
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+ if (Flags & MachineMemOperand::MOStore) {
+ MemSize = MFI.getObjectSize(FI);
+ } else {
+ for (unsigned Idx : Ops) {
+ int64_t OpSize = MFI.getObjectSize(FI);
+
+ if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
+ unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
+ if (SubRegSize > 0 && !(SubRegSize % 8))
+ OpSize = SubRegSize / 8;
+ }
+
+ MemSize = std::max(MemSize, OpSize);
+ }
+ }
+
+ assert(MemSize && "Did not expect a zero-sized stack slot");
+
MachineInstr *NewMI = nullptr;
if (MI.getOpcode() == TargetOpcode::STACKMAP ||
assert((!(Flags & MachineMemOperand::MOLoad) ||
NewMI->mayLoad()) &&
"Folded a use to a non-load!");
- const MachineFrameInfo &MFI = MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+ MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize,
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
MachineBasicBlock::iterator Pos = MI;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (Flags == MachineMemOperand::MOStore)
storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
return nullptr;
+ // Don't fold subreg spills, or reloads that use a high subreg.
+ for (auto Op : Ops) {
+ MachineOperand &MO = MI.getOperand(Op);
+ auto SubReg = MO.getSubReg();
+ if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
+ return nullptr;
+ }
+
const MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned Size = MFI.getObjectSize(FrameIndex);
unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
LiveIntervals *LIS) const {
+
+ // TODO: Support the case where LoadMI loads a wide register, but MI
+ // only uses a subreg.
+ for (auto Op : Ops) {
+ if (MI.getOperand(Op).getSubReg())
+ return nullptr;
+ }
+
// If loading from a FrameIndex, fold directly from the FrameIndex.
unsigned NumOps = LoadMI.getDesc().getNumOperands();
int FrameIndex;
bool expandPostRAPseudo(MachineInstr &MI) const override;
+ /// Check whether the target can fold a load that feeds a subreg operand
+ /// (or a subreg operand that feeds a store).
+ bool isSubregFoldable() const override { return true; }
+
/// foldMemoryOperand - If this target supports it, fold a load or store of
/// the specified stack slot into the specified machine instruction for the
/// specified operand(s). If this is possible, the target should perform the
define fastcc i8 @fold32to8(i32 %add, i8 %spill) {
; CHECK-LABEL: fold32to8:
; CHECK: movl %ecx, (%esp) # 4-byte Spill
-; CHECK: movl (%esp), %eax # 4-byte Reload
-; CHECK: subb %al, %dl
+; CHECK: subb (%esp), %dl # 1-byte Folded Reload
entry:
tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
%trunc = trunc i32 %add to i8
define i32 @fold64to32(i64 %add, i32 %spill) {
; CHECK-LABEL: fold64to32:
; CHECK: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; CHECK: subl %eax, %esi
+; CHECK: subl -{{[0-9]+}}(%rsp), %esi # 4-byte Folded Reload
entry:
tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
%trunc = trunc i64 %add to i32
define i8 @fold64to8(i64 %add, i8 %spill) {
; CHECK-LABEL: fold64to8:
; CHECK: movq %rdi, -{{[0-9]+}}(%rsp) # 8-byte Spill
-; CHECK: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload
-; CHECK: subb %al, %sil
+; CHECK: subb -{{[0-9]+}}(%rsp), %sil # 1-byte Folded Reload
entry:
tail call void asm sideeffect "", "~{rax},~{rbx},~{rcx},~{rdx},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{dirflag},~{fpsr},~{flags}"()
%trunc = trunc i64 %add to i8
; AVX1-NEXT: orl %ebx, %r14d
; AVX1-NEXT: shlq $32, %r14
; AVX1-NEXT: orq %r15, %r14
-; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = mem[1,0]
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movw %ax, %bx
; AVX1-NEXT: shll $16, %ebx
; AVX2-NEXT: orl %ebx, %r14d
; AVX2-NEXT: shlq $32, %r14
; AVX2-NEXT: orq %r15, %r14
-; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT: # xmm0 = mem[1,0]
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movw %ax, %bx
; AVX2-NEXT: shll $16, %ebx
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX1-NEXT: vzeroupper
+; AVX1-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; AVX1-NEXT: # xmm0 = mem[1,0]
; AVX1-NEXT: callq __truncdfhf2
; AVX1-NEXT: movl %eax, %r12d
; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill
-; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload
-; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload
+; AVX2-NEXT: # xmm0 = mem[1,0]
; AVX2-NEXT: callq __truncdfhf2
; AVX2-NEXT: movl %eax, %r12d
; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload