AArch64Subtarget &, AArch64RegisterBankInfo &);
FunctionPass *createAArch64PreLegalizeCombiner();
FunctionPass *createAArch64StackTaggingPass(bool MergeInit);
+FunctionPass *createAArch64StackTaggingPreRAPass();
void initializeAArch64A53Fix835769Pass(PassRegistry&);
void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
void initializeFalkorMarkStridedAccessesLegacyPass(PassRegistry&);
void initializeLDTLSCleanupPass(PassRegistry&);
void initializeAArch64StackTaggingPass(PassRegistry&);
+void initializeAArch64StackTaggingPreRAPass(PassRegistry&);
} // end namespace llvm
#endif
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64InstrInfo *TII =
MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
const AArch64FrameLowering *TFI = getFrameLowering(MF);
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
+ bool Tagged =
+ MI.getOperand(FIOperandNum).getTargetFlags() & AArch64II::MO_TAGGED;
unsigned FrameReg;
// Special handling of dbg_value, stackmap and patchpoint instructions.
StackOffset Offset;
if (MI.getOpcode() == AArch64::TAGPstack) {
// TAGPstack must use the virtual frame register in its 3rd operand.
- const MachineFrameInfo &MFI = MF.getFrameInfo();
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
FrameReg = MI.getOperand(3).getReg();
Offset = {MFI.getObjectOffset(FrameIndex) +
AFI->getTaggedBasePointerOffset(),
MVT::i8};
+ } else if (Tagged) {
+ StackOffset SPOffset = {
+ MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(), MVT::i8};
+ if (MFI.hasVarSizedObjects() ||
+ isAArch64FrameOffsetLegal(MI, SPOffset, nullptr, nullptr, nullptr) !=
+ (AArch64FrameOffsetCanUpdate | AArch64FrameOffsetIsLegal)) {
+ // Can't update to SP + offset in place. Precalculate the tagged pointer
+ // in a scratch register.
+ Offset = TFI->resolveFrameIndexReference(
+ MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
+ Register ScratchReg =
+ MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
+ emitFrameOffset(MBB, II, MI.getDebugLoc(), ScratchReg, FrameReg, Offset,
+ TII);
+ BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(AArch64::LDG), ScratchReg)
+ .addReg(ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(0);
+ MI.getOperand(FIOperandNum)
+ .ChangeToRegister(ScratchReg, false, false, true);
+ return;
+ }
+ FrameReg = AArch64::SP;
+ Offset = {MFI.getObjectOffset(FrameIndex) + (int64_t)MFI.getStackSize(),
+ MVT::i8};
} else {
Offset = TFI->resolveFrameIndexReference(
MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true);
--- /dev/null
+//===-- AArch64StackTaggingPreRA.cpp --- Stack Tagging for AArch64 -----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "AArch64.h"
+#include "AArch64MachineFunctionInfo.h"
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineTraceMetrics.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetRegisterInfo.h"
+#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-stack-tagging-pre-ra"
+
+enum UncheckedLdStMode { UncheckedNever, UncheckedSafe, UncheckedAlways };
+
+cl::opt<UncheckedLdStMode> ClUncheckedLdSt(
+ "stack-tagging-unchecked-ld-st", cl::Hidden,
+ cl::init(UncheckedSafe),
+ cl::desc(
+ "Unconditionally apply unchecked-ld-st optimization (even for large "
+ "stack frames, or in the presence of variable sized allocas)."),
+ cl::values(
+ clEnumValN(UncheckedNever, "never", "never apply unchecked-ld-st"),
+ clEnumValN(
+ UncheckedSafe, "safe",
+ "apply unchecked-ld-st when the target is definitely within range"),
+ clEnumValN(UncheckedAlways, "always", "always apply unchecked-ld-st")));
+
+namespace {
+
+class AArch64StackTaggingPreRA : public MachineFunctionPass {
+ MachineFunction *MF;
+ AArch64FunctionInfo *AFI;
+ MachineFrameInfo *MFI;
+ MachineRegisterInfo *MRI;
+ const AArch64RegisterInfo *TRI;
+ const AArch64InstrInfo *TII;
+
+ SmallVector<MachineInstr*, 16> ReTags;
+
+public:
+ static char ID;
+ AArch64StackTaggingPreRA() : MachineFunctionPass(ID) {
+ initializeAArch64StackTaggingPreRAPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool mayUseUncheckedLoadStore();
+ void uncheckUsesOf(unsigned TaggedReg, int FI);
+ void uncheckLoadsAndStores();
+
+ bool runOnMachineFunction(MachineFunction &Func) override;
+ StringRef getPassName() const override {
+ return "AArch64 Stack Tagging PreRA";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // end anonymous namespace
+
+char AArch64StackTaggingPreRA::ID = 0;
+
+INITIALIZE_PASS_BEGIN(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
+ "AArch64 Stack Tagging PreRA Pass", false, false)
+INITIALIZE_PASS_END(AArch64StackTaggingPreRA, "aarch64-stack-tagging-pre-ra",
+ "AArch64 Stack Tagging PreRA Pass", false, false)
+
+FunctionPass *llvm::createAArch64StackTaggingPreRAPass() {
+ return new AArch64StackTaggingPreRA();
+}
+
+static bool isUncheckedLoadOrStoreOpcode(unsigned Opcode) {
+ switch (Opcode) {
+ case AArch64::LDRWui:
+ case AArch64::LDRSHWui:
+ case AArch64::LDRXui:
+ case AArch64::LDRBui:
+ case AArch64::LDRBBui:
+ case AArch64::LDRHui:
+ case AArch64::LDRSui:
+ case AArch64::LDRDui:
+ case AArch64::LDRQui:
+ case AArch64::STRWui:
+ case AArch64::STRXui:
+ case AArch64::STRBui:
+ case AArch64::STRBBui:
+ case AArch64::STRHui:
+ case AArch64::STRSui:
+ case AArch64::STRDui:
+ case AArch64::STRQui:
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool AArch64StackTaggingPreRA::mayUseUncheckedLoadStore() {
+ if (ClUncheckedLdSt == UncheckedNever)
+ return false;
+ else if (ClUncheckedLdSt == UncheckedAlways)
+ return true;
+
+ // This estimate can be improved if we had harder guarantees about stack frame
+ // layout. With LocalStackAllocation we can estimate SP offset to any
+ // preallocated slot. AArch64FrameLowering::orderFrameObjects could put tagged
+ // objects ahead of non-tagged ones, but that's not always desirable.
+ //
+ // Underestimating SP offset here may require the use of LDG to materialize
+ // the tagged address of the stack slot, along with a scratch register
+ // allocation (post-regalloc!).
+ //
+ // For now we do the safe thing here and require that the entire stack frame
+ // is within range of the shortest of the unchecked instructions.
+ unsigned FrameSize = 0;
+ for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i)
+ FrameSize += MFI->getObjectSize(i);
+ bool EntireFrameReachableFromSP = FrameSize < 0xf00;
+ return !MFI->hasVarSizedObjects() && EntireFrameReachableFromSP;
+}
+
+void AArch64StackTaggingPreRA::uncheckUsesOf(unsigned TaggedReg, int FI) {
+ for (auto UI = MRI->use_instr_begin(TaggedReg), E = MRI->use_instr_end();
+ UI != E;) {
+ MachineInstr *UseI = &*(UI++);
+ if (isUncheckedLoadOrStoreOpcode(UseI->getOpcode())) {
+ // FI operand is always the one before the immediate offset.
+ unsigned OpIdx = TII->getLoadStoreImmIdx(UseI->getOpcode()) - 1;
+ if (UseI->getOperand(OpIdx).isReg() &&
+ UseI->getOperand(OpIdx).getReg() == TaggedReg) {
+ UseI->getOperand(OpIdx).ChangeToFrameIndex(FI);
+ UseI->getOperand(OpIdx).setTargetFlags(AArch64II::MO_TAGGED);
+ }
+ } else if (UseI->isCopy() &&
+ Register::isVirtualRegister(UseI->getOperand(0).getReg())) {
+ uncheckUsesOf(UseI->getOperand(0).getReg(), FI);
+ }
+ }
+}
+
+void AArch64StackTaggingPreRA::uncheckLoadsAndStores() {
+ for (auto *I : ReTags) {
+ unsigned TaggedReg = I->getOperand(0).getReg();
+ int FI = I->getOperand(1).getIndex();
+ uncheckUsesOf(TaggedReg, FI);
+ }
+}
+
+bool AArch64StackTaggingPreRA::runOnMachineFunction(MachineFunction &Func) {
+ MF = &Func;
+ MRI = &MF->getRegInfo();
+ AFI = MF->getInfo<AArch64FunctionInfo>();
+ TII = static_cast<const AArch64InstrInfo *>(MF->getSubtarget().getInstrInfo());
+ TRI = static_cast<const AArch64RegisterInfo *>(
+ MF->getSubtarget().getRegisterInfo());
+ MFI = &MF->getFrameInfo();
+ ReTags.clear();
+
+ assert(MRI->isSSA());
+
+ LLVM_DEBUG(dbgs() << "********** AArch64 Stack Tagging PreRA **********\n"
+ << "********** Function: " << MF->getName() << '\n');
+
+ SmallSetVector<int, 8> TaggedSlots;
+ for (auto &BB : *MF) {
+ for (auto &I : BB) {
+ if (I.getOpcode() == AArch64::TAGPstack) {
+ ReTags.push_back(&I);
+ int FI = I.getOperand(1).getIndex();
+ TaggedSlots.insert(FI);
+ // There should be no offsets in TAGP yet.
+ assert(I.getOperand(2).getImm() == 0);
+ }
+ }
+ }
+
+ if (ReTags.empty())
+ return false;
+
+ if (mayUseUncheckedLoadStore())
+ uncheckLoadsAndStores();
+
+ return true;
+}
initializeLDTLSCleanupPass(*PR);
initializeAArch64SpeculationHardeningPass(*PR);
initializeAArch64StackTaggingPass(*PR);
+ initializeAArch64StackTaggingPreRAPass(*PR);
}
//===----------------------------------------------------------------------===//
if (EnableStPairSuppress)
addPass(createAArch64StorePairSuppressPass());
addPass(createAArch64SIMDInstrOptPass());
+ if (TM->getOptLevel() != CodeGenOpt::None)
+ addPass(createAArch64StackTaggingPreRAPass());
return true;
}
AArch64SelectionDAGInfo.cpp
AArch64SpeculationHardening.cpp
AArch64StackTagging.cpp
+ AArch64StackTaggingPreRA.cpp
AArch64StorePairSuppress.cpp
AArch64Subtarget.cpp
AArch64TargetMachine.cpp
/// MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag
/// in bits 56-63.
+ /// On a FrameIndex operand, indicates that the underlying memory is tagged
+ /// with an unknown tag value (MTE); this needs to be lowered either to an
+ /// SP-relative load or store instruction (which do not check tags), or to
+ /// an LDG instruction to obtain the tag value.
MO_TAGGED = 0x400,
};
} // end namespace AArch64II
; CHECK-NEXT: Early If-Conversion
; CHECK-NEXT: AArch64 Store Pair Suppression
; CHECK-NEXT: AArch64 SIMD instructions optimization pass
+; CHECK-NEXT: AArch64 Stack Tagging PreRA
; CHECK-NEXT: MachineDominator Tree Construction
; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: Early Machine Loop Invariant Code Motion
--- /dev/null
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s --check-prefixes=DEFAULT,COMMON
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -stack-tagging-unchecked-ld-st=never | FileCheck %s --check-prefixes=NEVER,COMMON
+; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -stack-tagging-unchecked-ld-st=always | FileCheck %s --check-prefixes=ALWAYS,COMMON
+
+declare void @use8(i8*)
+declare void @use32(i32*)
+declare void @use2x64([2 x i64]*)
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture)
+
+define i32 @CallLd() sanitize_memtag {
+entry:
+ %x = alloca i32, align 4
+ call void @use32(i32* %x)
+ %a = load i32, i32* %x
+ ret i32 %a
+}
+
+; COMMON: CallLd:
+; COMMON: bl use32
+
+; ALWAYS: ldr w0, [sp]
+; DEFAULT: ldr w0, [sp]
+; NEVER: ldr w0, [x{{.*}}]
+
+; COMMON: ret
+
+define void @CallStCall() sanitize_memtag {
+entry:
+ %x = alloca i32, align 4
+ call void @use32(i32* %x)
+ store i32 42, i32* %x
+ call void @use32(i32* %x)
+ ret void
+}
+
+; COMMON: CallStCall:
+; COMMON: bl use32
+
+; ALWAYS: str w{{.*}}, [sp]
+; DEFAULT: str w{{.*}}, [sp]
+; NEVER: str w{{.*}}, [x{{.*}}]
+
+; COMMON: bl use32
+; COMMON: ret
+
+define void @CallStPair(i64 %z) sanitize_memtag {
+entry:
+ %x = alloca [2 x i64], align 8
+ call void @use2x64([2 x i64]* %x)
+ %x0 = getelementptr inbounds [2 x i64], [2 x i64]* %x, i64 0, i64 0
+ store i64 %z, i64* %x0, align 8
+ %x1 = getelementptr inbounds [2 x i64], [2 x i64]* %x, i64 0, i64 1
+ store i64 %z, i64* %x1, align 8
+ call void @use2x64([2 x i64]* %x)
+ ret void
+}
+
+; COMMON: CallStPair:
+; COMMON: bl use2x64
+
+; ALWAYS: stp {{.*}}, [sp]
+; DEFAULT: stp {{.*}}, [sp]
+; NEVER: stp {{.*}}, [x{{.*}}]
+
+; COMMON: bl use2x64
+; COMMON: ret
+
+; One of the two allocas will end up out of range of ldrb [sp].
+define dso_local i8 @LargeFrame() sanitize_memtag {
+entry:
+ %x = alloca [4096 x i8], align 4
+ %y = alloca [4096 x i8], align 4
+ %0 = getelementptr inbounds [4096 x i8], [4096 x i8]* %x, i64 0, i64 0
+ %1 = getelementptr inbounds [4096 x i8], [4096 x i8]* %y, i64 0, i64 0
+ call void @use8(i8* %0)
+ call void @use8(i8* %1)
+ %2 = load i8, i8* %0, align 4
+ %3 = load i8, i8* %1, align 4
+ %add = add i8 %3, %2
+ ret i8 %add
+}
+
+; COMMON: LargeFrame:
+; COMMON: bl use8
+; COMMON: bl use8
+
+; NEVER: ldrb [[A:w.*]], [x{{.*}}]
+; NEVER: ldrb [[B:w.*]], [x{{.*}}]
+
+; DEFAULT: ldrb [[A:w.*]], [x{{.*}}]
+; DEFAULT: ldrb [[B:w.*]], [x{{.*}}]
+
+; ALWAYS: ldg [[PA:x.*]], [x{{.*}}]
+; ALWAYS: ldrb [[B:w.*]], [sp]
+; ALWAYS: ldrb [[A:w.*]], {{\[}}[[PA]]{{\]}}
+
+; COMMON: add w0, [[B]], [[A]]
+; COMMON: ret
+
+; One of these allocas is closer to FP than to SP, and within 256 bytes
+; of the former (see hardcoded limit in resolveFrameOffsetReference).
+; It could be lowered to an FP-relative load, but not when doing an
+; unchecked access to tagged memory!
+define i8 @FPOffset() "frame-pointer"="all" sanitize_memtag {
+ %x = alloca [200 x i8], align 4
+ %y = alloca [200 x i8], align 4
+ %z = alloca [200 x i8], align 4
+ %x0 = getelementptr inbounds [200 x i8], [200 x i8]* %x, i64 0, i64 0
+ %y0 = getelementptr inbounds [200 x i8], [200 x i8]* %y, i64 0, i64 0
+ %z0 = getelementptr inbounds [200 x i8], [200 x i8]* %z, i64 0, i64 0
+ call void @use8(i8* %x0)
+ call void @use8(i8* %y0)
+ call void @use8(i8* %z0)
+ %x1 = load i8, i8* %x0, align 4
+ %y1 = load i8, i8* %y0, align 4
+ %z1 = load i8, i8* %z0, align 4
+ %a = add i8 %x1, %y1
+ %b = add i8 %a, %z1
+ ret i8 %b
+}
+
+; COMMON: FPOffset:
+; COMMON: bl use8
+; COMMON: bl use8
+; COMMON: bl use8
+
+; All three loads are SP-based.
+; ALWAYS-DAG: ldrb w{{.*}}, [sp, #416]
+; ALWAYS-DAG: ldrb w{{.*}}, [sp, #208]
+; ALWAYS-DAG: ldrb w{{.*}}, [sp]
+
+; DEFAULT-DAG: ldrb w{{.*}}, [sp, #416]
+; DEFAULT-DAG: ldrb w{{.*}}, [sp, #208]
+; DEFAULT-DAG: ldrb w{{.*}}, [sp]
+
+; NEVER-DAG: ldrb w{{.*}}, [x{{.*}}]
+; NEVER-DAG: ldrb w{{.*}}, [x{{.*}}]
+; NEVER-DAG: ldrb w{{.*}}, [x{{.*}}]
+
+; COMMON: ret