From: Evgeniy Stepanov Date: Wed, 17 Jul 2019 19:24:02 +0000 (+0000) Subject: Basic codegen for MTE stack tagging. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=4fca50d47d33e5d7b252bdf7784566c58e4b62f5;p=llvm Basic codegen for MTE stack tagging. Implement IR intrinsics for stack tagging. Generated code is very unoptimized for now. Two special intrinsics, llvm.aarch64.irg.sp and llvm.aarch64.tagp are used to implement a tagged stack frame pointer in a virtual register. Differential Revision: https://reviews.llvm.org/D64172 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@366360 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/include/llvm/CodeGen/SelectionDAGTargetInfo.h b/include/llvm/CodeGen/SelectionDAGTargetInfo.h index 7c9f57beb66..6f6a9a5ae26 100644 --- a/include/llvm/CodeGen/SelectionDAGTargetInfo.h +++ b/include/llvm/CodeGen/SelectionDAGTargetInfo.h @@ -147,6 +147,14 @@ public: return std::make_pair(SDValue(), SDValue()); } + virtual SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Addr, + SDValue Size, + MachinePointerInfo DstPtrInfo, + bool ZeroData) const { + return SDValue(); + } + // Return true when the decision to generate FMA's (or FMS, FMLA etc) rather // than FMUL and ADD is delegated to the machine combiner. virtual bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const { diff --git a/include/llvm/IR/IntrinsicsAArch64.td b/include/llvm/IR/IntrinsicsAArch64.td index 7616d6a90c1..832aca4fd30 100644 --- a/include/llvm/IR/IntrinsicsAArch64.td +++ b/include/llvm/IR/IntrinsicsAArch64.td @@ -702,4 +702,34 @@ def int_aarch64_stg : Intrinsic<[], [llvm_ptr_ty, llvm_ptr_ty], [IntrWriteMem]>; def int_aarch64_subp : Intrinsic<[llvm_i64_ty], [llvm_ptr_ty, llvm_ptr_ty], [IntrNoMem]>; + +// The following are codegen-only intrinsics for stack instrumentation. + +// Generate a randomly tagged stack base pointer. +def int_aarch64_irg_sp : Intrinsic<[llvm_ptr_ty], [llvm_i64_ty], + [IntrInaccessibleMemOnly]>; + +// Transfer pointer tag with offset. +// ptr1 = tagp(ptr0, baseptr, tag_offset) returns a pointer where +// * address is the address in ptr0 +// * tag is a function of (tag in baseptr, tag_offset). +// Address bits in baseptr and tag bits in ptr0 are ignored. +// When offset between ptr0 and baseptr is a compile time constant, this can be emitted as +// ADDG ptr1, baseptr, (ptr0 - baseptr), tag_offset +// It is intended that ptr0 is an alloca address, and baseptr is the direct output of llvm.aarch64.irg.sp. +def int_aarch64_tagp : Intrinsic<[llvm_anyptr_ty], [LLVMMatchType<0>, llvm_ptr_ty, llvm_i64_ty], + [IntrNoMem, ImmArg<2>]>; + +// Update allocation tags for the memory range to match the tag in the pointer argument. +def int_aarch64_settag : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + +// Update allocation tags for the memory range to match the tag in the pointer argument, +// and set memory contents to zero. +def int_aarch64_settag_zero : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; + +// Update allocation tags for 16-aligned, 16-sized memory region, and store a pair 8-byte values. +def int_aarch64_stgp : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i64_ty], + [IntrWriteMem, IntrArgMemOnly, NoCapture<0>, WriteOnly<0>]>; } diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 49a328bbc9b..c70906dcc62 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -3666,7 +3666,8 @@ bool llvm::isIntrinsicReturningPointerAliasingArgumentWithoutCapturing( const CallBase *Call) { return Call->getIntrinsicID() == Intrinsic::launder_invariant_group || Call->getIntrinsicID() == Intrinsic::strip_invariant_group || - Call->getIntrinsicID() == Intrinsic::aarch64_irg; + Call->getIntrinsicID() == Intrinsic::aarch64_irg || + Call->getIntrinsicID() == Intrinsic::aarch64_tagp; } /// \p PN defines a loop-variant pointer to an object. Check if the diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 61ec29261c3..e818dd27c05 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6805,6 +6805,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, // MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely // delete it now. return; + + case Intrinsic::aarch64_settag: + case Intrinsic::aarch64_settag_zero: { + const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo(); + bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero; + SDValue Val = TSI.EmitTargetCodeForSetTag( + DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)), + ZeroMemory); + DAG.setRoot(Val); + setValue(&I, Val); + return; + } } } diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 68076d2ca92..210c10eb184 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -15,6 +15,7 @@ #include "AArch64ExpandImm.h" #include "AArch64InstrInfo.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "Utils/AArch64BaseInfo.h" @@ -74,6 +75,9 @@ private: bool expandCMP_SWAP_128(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, MachineBasicBlock::iterator &NextMBBI); + bool expandSetTagLoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); }; } // end anonymous namespace @@ -336,6 +340,64 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128( return true; } +bool AArch64ExpandPseudo::expandSetTagLoop( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + Register SizeReg = MI.getOperand(2).getReg(); + Register AddressReg = MI.getOperand(3).getReg(); + + MachineFunction *MF = MBB.getParent(); + + bool ZeroData = MI.getOpcode() == AArch64::STZGloop; + const unsigned OpCode = + ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex; + + auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), LoopBB); + MF->insert(++LoopBB->getIterator(), DoneBB); + + BuildMI(LoopBB, DL, TII->get(OpCode)) + .addDef(AddressReg) + .addReg(AddressReg) + .addReg(AddressReg) + .addImm(2) + .cloneMemRefs(MI) + .setMIFlags(MI.getFlags()); + BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri)) + .addDef(SizeReg) + .addReg(SizeReg) + .addImm(16 * 2) + .addImm(0); + BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB); + + LoopBB->addSuccessor(LoopBB); + LoopBB->addSuccessor(DoneBB); + + DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + + MBB.addSuccessor(LoopBB); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + // Recompute liveness bottom up. + LivePhysRegs LiveRegs; + computeAndAddLiveIns(LiveRegs, *DoneBB); + computeAndAddLiveIns(LiveRegs, *LoopBB); + // Do an extra pass in the loop to get the loop carried dependencies right. + // FIXME: is this necessary? + LoopBB->clearLiveIns(); + computeAndAddLiveIns(LiveRegs, *LoopBB); + DoneBB->clearLiveIns(); + computeAndAddLiveIns(LiveRegs, *DoneBB); + + return true; +} + /// If MBBI references a pseudo instruction that should be expanded here, /// do the expansion and return true. Otherwise return false. bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, @@ -569,6 +631,46 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case AArch64::IRGstack: { + MachineFunction &MF = *MBB.getParent(); + const AArch64FunctionInfo *AFI = MF.getInfo(); + const AArch64FrameLowering *TFI = + MF.getSubtarget().getFrameLowering(); + + // IRG does not allow immediate offset. getTaggedBasePointerOffset should + // almost always point to SP-after-prologue; if not, emit a longer + // instruction sequence. + int BaseOffset = -AFI->getTaggedBasePointerOffset(); + unsigned FrameReg; + int FrameRegOffset = TFI->resolveFrameOffsetReference( + MF, BaseOffset, false /*isFixed*/, FrameReg, /*PreferFP=*/false, + /*ForSimm=*/true); + Register SrcReg = FrameReg; + if (FrameRegOffset != 0) { + // Use output register as temporary. + SrcReg = MI.getOperand(0).getReg(); + emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg, + FrameRegOffset, TII); + } + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG)) + .add(MI.getOperand(0)) + .addUse(SrcReg) + .add(MI.getOperand(2)); + MI.eraseFromParent(); + return true; + } + case AArch64::TAGPstack: { + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDG)) + .add(MI.getOperand(0)) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .add(MI.getOperand(4)); + MI.eraseFromParent(); + return true; + } + case AArch64::STGloop: + case AArch64::STZGloop: + return expandSetTagLoop(MBB, MBBI, NextMBBI); } return false; } diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index fed0fc7f624..8c6e5cbd5c1 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -842,6 +842,10 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, if (MF.getFunction().getCallingConv() == CallingConv::GHC) return; + // Set tagged base pointer to the bottom of the stack frame. + // Ideally it should match SP value after prologue. + AFI->setTaggedBasePointerOffset(MFI.getStackSize()); + // getStackSize() includes all the locals in its size calculation. We don't // include these locals when computing the stack size of a funclet, as they // are allocated in the parent's stack frame and accessed via the frame diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 2a911c43908..cd7e927ac80 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -157,6 +157,9 @@ public: bool tryIndexedLoad(SDNode *N); + bool trySelectStackSlotTagP(SDNode *N); + void SelectTagP(SDNode *N); + void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, unsigned SubRegIdx); void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, @@ -703,7 +706,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSigned return true; } - // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed + // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed // selected here doesn't support labels/immediates, only base+offset. if (CurDAG->isBaseWithConstantOffset(N)) { if (ConstantSDNode *RHS = dyn_cast(N.getOperand(1))) { @@ -2790,6 +2793,58 @@ bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { return true; } +bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { + // tagp(FrameIndex, IRGstack, tag_offset): + // since the offset between FrameIndex and IRGstack is a compile-time + // constant, this can be lowered to a single ADDG instruction. + if (!(isa(N->getOperand(1)))) { + return false; + } + + SDValue IRG_SP = N->getOperand(2); + if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || + cast(IRG_SP->getOperand(1))->getZExtValue() != + Intrinsic::aarch64_irg_sp) { + return false; + } + + const TargetLowering *TLI = getTargetLowering(); + SDLoc DL(N); + int FI = cast(N->getOperand(1))->getIndex(); + SDValue FiOp = CurDAG->getTargetFrameIndex( + FI, TLI->getPointerTy(CurDAG->getDataLayout())); + int TagOffset = cast(N->getOperand(3))->getZExtValue(); + + SDNode *Out = CurDAG->getMachineNode( + AArch64::TAGPstack, DL, MVT::i64, + {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), + CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); + ReplaceNode(N, Out); + return true; +} + +void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { + assert(isa(N->getOperand(3)) && + "llvm.aarch64.tagp third argument must be an immediate"); + if (trySelectStackSlotTagP(N)) + return; + // FIXME: above applies in any case when offset between Op1 and Op2 is a + // compile-time constant, not just for stack allocations. + + // General case for unrelated pointers in Op1 and Op2. + SDLoc DL(N); + int TagOffset = cast(N->getOperand(3))->getZExtValue(); + SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, + {N->getOperand(1), N->getOperand(2)}); + SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, + {SDValue(N1, 0), N->getOperand(2)}); + SDNode *N3 = CurDAG->getMachineNode( + AArch64::ADDG, DL, MVT::i64, + {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), + CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); + ReplaceNode(N, N3); +} + void AArch64DAGToDAGISel::Select(SDNode *Node) { // If we have a custom node, we already have selected! if (Node->isMachineOpcode()) { @@ -3283,6 +3338,9 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { switch (IntNo) { default: break; + case Intrinsic::aarch64_tagp: + SelectTagP(Node); + return; case Intrinsic::aarch64_neon_tbl2: SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 11ee1a5f9e6..7becc99fb5c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1234,6 +1234,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS"; case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE"; case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS"; + case AArch64ISD::STG: return "AArch64ISD::STG"; + case AArch64ISD::STZG: return "AArch64ISD::STZG"; + case AArch64ISD::ST2G: return "AArch64ISD::ST2G"; + case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G"; } return nullptr; } diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index 754caaf5192..4421c31f65c 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -214,7 +214,13 @@ enum NodeType : unsigned { LD4LANEpost, ST2LANEpost, ST3LANEpost, - ST4LANEpost + ST4LANEpost, + + STG, + STZG, + ST2G, + STZ2G + }; } // end namespace AArch64ISD diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 74fa5ef713d..d619137b55c 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -4067,12 +4067,12 @@ multiclass MemTagStore opc1, string insn> { (outs), (ins GPR64sp:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; def PreIndex : BaseMemTagStore; def PostIndex : BaseMemTagStore; diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index 599a5abd611..215e96a82d0 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -1772,6 +1772,7 @@ unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) { case AArch64::STNPWi: case AArch64::STNPSi: case AArch64::LDG: + case AArch64::STGPi: return 3; case AArch64::ADDG: case AArch64::STGOffset: @@ -2151,6 +2152,7 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, MaxOffset = 4095; break; case AArch64::ADDG: + case AArch64::TAGPstack: Scale = 16; Width = 0; MinOffset = 0; @@ -2158,10 +2160,23 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale, break; case AArch64::LDG: case AArch64::STGOffset: + case AArch64::STZGOffset: Scale = Width = 16; MinOffset = -256; MaxOffset = 255; break; + case AArch64::ST2GOffset: + case AArch64::STZ2GOffset: + Scale = 16; + Width = 32; + MinOffset = -256; + MaxOffset = 255; + break; + case AArch64::STGPi: + Scale = Width = 16; + MinOffset = -64; + MaxOffset = 63; + break; } return true; @@ -3257,6 +3272,8 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, case AArch64::ST1Twov1d: case AArch64::ST1Threev1d: case AArch64::ST1Fourv1d: + case AArch64::IRG: + case AArch64::IRGstack: return AArch64FrameOffsetCannotUpdate; } diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 897b3ebb384..eed53f36d57 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -409,6 +409,12 @@ def AArch64uminv : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>; def AArch64smaxv : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>; def AArch64umaxv : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>; +def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>; +def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; + //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -1289,6 +1295,15 @@ defm STZG : MemTagStore<0b01, "stzg">; defm ST2G : MemTagStore<0b10, "st2g">; defm STZ2G : MemTagStore<0b11, "stz2g">; +def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), + (STGOffset $Rn, $Rm, $imm)>; +def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), + (STZGOffset $Rn, $Rm, $imm)>; +def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), + (ST2GOffset $Rn, $Rm, $imm)>; +def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)), + (STZ2GOffset $Rn, $Rm, $imm)>; + defm STGP : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">; def STGPpre : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">; def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; @@ -1296,6 +1311,36 @@ def STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">; def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)), (STGOffset GPR64:$Rt, GPR64sp:$Rn, simm9s16:$offset)>; +def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2), + (STGPi $Rt, $Rt2, $Rn, $imm)>; + +def IRGstack + : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>, + Sched<[]>; +def TAGPstack + : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>, + Sched<[]>; + +// Explicit SP in the first operand prevents ShrinkWrap optimization +// from leaving this instruction out of the stack frame. When IRGstack +// is transformed into IRG, this operand is replaced with the actual +// register / expression for the tagged base pointer of the current function. +def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>; + +// Large STG to be expanded into a loop. $Rm is the size, $Rn is start address. +// $Rn_wback is one past the end of the range. +let isCodeGenOnly=1, mayStore=1 in { +def STGloop + : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn), + [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >, + Sched<[WriteAdr, WriteST]>; + +def STZGloop + : Pseudo<(outs GPR64common:$Rm_wback, GPR64sp:$Rn_wback), (ins GPR64common:$Rm, GPR64sp:$Rn), + [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,$Rm = $Rm_wback,@earlyclobber $Rm_wback" >, + Sched<[WriteAdr, WriteST]>; +} + } // Predicates = [HasMTE] //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/lib/Target/AArch64/AArch64MachineFunctionInfo.h index f4e810fa454..0efeeb272ec 100644 --- a/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -105,6 +105,12 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { /// ForwardedMustTailRegParms - A list of virtual and physical registers /// that must be forwarded to every musttail call. SmallVector ForwardedMustTailRegParms; + + // Offset from SP-at-entry to the tagged base pointer. + // Tagged base pointer is set up to point to the first (lowest address) tagged + // stack slot. + unsigned TaggedBasePointerOffset; + public: AArch64FunctionInfo() = default; @@ -224,6 +230,13 @@ public: return ForwardedMustTailRegParms; } + unsigned getTaggedBasePointerOffset() const { + return TaggedBasePointerOffset; + } + void setTaggedBasePointerOffset(unsigned Offset) { + TaggedBasePointerOffset = Offset; + } + private: // Hold the lists of LOHs. MILOHContainer LOHContainerSet; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index c44d77c7263..6d5a4e3d2f7 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -468,10 +468,19 @@ void AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, return; } - // Modify MI as necessary to handle as much of 'Offset' as possible - Offset = TFI->resolveFrameIndexReference( - MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); + if (MI.getOpcode() == AArch64::TAGPstack) { + // TAGPstack must use the virtual frame register in its 3rd operand. + const MachineFrameInfo &MFI = MF.getFrameInfo(); + const AArch64FunctionInfo *AFI = MF.getInfo(); + FrameReg = MI.getOperand(3).getReg(); + Offset = + MFI.getObjectOffset(FrameIndex) + AFI->getTaggedBasePointerOffset(); + } else { + Offset = TFI->resolveFrameIndexReference( + MF, FrameIndex, FrameReg, /*PreferFP=*/false, /*ForSimm=*/true); + } + // Modify MI as necessary to handle as much of 'Offset' as possible if (rewriteAArch64FrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 953d7387f2e..60dbace03ca 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -56,3 +56,91 @@ bool AArch64SelectionDAGInfo::generateFMAsInMachineCombiner( CodeGenOpt::Level OptLevel) const { return OptLevel >= CodeGenOpt::Aggressive; } + +static const int kSetTagLoopThreshold = 176; + +static SDValue EmitUnrolledSetTag(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Ptr, uint64_t ObjSize, + const MachineMemOperand *BaseMemOperand, + bool ZeroData) { + MachineFunction &MF = DAG.getMachineFunction(); + unsigned ObjSizeScaled = ObjSize / 16; + + SDValue TagSrc = Ptr; + if (Ptr.getOpcode() == ISD::FrameIndex) { + int FI = cast(Ptr)->getIndex(); + Ptr = DAG.getTargetFrameIndex(FI, MVT::i64); + // A frame index operand may end up as [SP + offset] => it is fine to use SP + // register as the tag source. + TagSrc = DAG.getRegister(AArch64::SP, MVT::i64); + } + + const unsigned OpCode1 = ZeroData ? AArch64ISD::STZG : AArch64ISD::STG; + const unsigned OpCode2 = ZeroData ? AArch64ISD::STZ2G : AArch64ISD::ST2G; + + SmallVector OutChains; + unsigned OffsetScaled = 0; + while (OffsetScaled < ObjSizeScaled) { + if (ObjSizeScaled - OffsetScaled >= 2) { + SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl); + SDValue St = DAG.getMemIntrinsicNode( + OpCode2, dl, DAG.getVTList(MVT::Other), + {Chain, TagSrc, AddrNode}, + MVT::v4i64, + MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16 * 2)); + OffsetScaled += 2; + OutChains.push_back(St); + continue; + } + + if (ObjSizeScaled - OffsetScaled > 0) { + SDValue AddrNode = DAG.getMemBasePlusOffset(Ptr, OffsetScaled * 16, dl); + SDValue St = DAG.getMemIntrinsicNode( + OpCode1, dl, DAG.getVTList(MVT::Other), + {Chain, TagSrc, AddrNode}, + MVT::v2i64, + MF.getMachineMemOperand(BaseMemOperand, OffsetScaled * 16, 16)); + OffsetScaled += 1; + OutChains.push_back(St); + } + } + + SDValue Res = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); + return Res; +} + +SDValue AArch64SelectionDAGInfo::EmitTargetCodeForSetTag( + SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Addr, + SDValue Size, MachinePointerInfo DstPtrInfo, bool ZeroData) const { + uint64_t ObjSize = cast(Size)->getZExtValue(); + assert(ObjSize % 16 == 0); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineMemOperand *BaseMemOperand = MF.getMachineMemOperand( + DstPtrInfo, MachineMemOperand::MOStore, ObjSize, 16); + + bool UseSetTagRangeLoop = + kSetTagLoopThreshold >= 0 && (int)ObjSize >= kSetTagLoopThreshold; + if (!UseSetTagRangeLoop) + return EmitUnrolledSetTag(DAG, dl, Chain, Addr, ObjSize, BaseMemOperand, + ZeroData); + + if (ObjSize % 32 != 0) { + SDNode *St1 = DAG.getMachineNode( + ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex, dl, + {MVT::i64, MVT::Other}, + {Addr, Addr, DAG.getTargetConstant(1, dl, MVT::i64), Chain}); + DAG.setNodeMemRefs(cast(St1), {BaseMemOperand}); + ObjSize -= 16; + Addr = SDValue(St1, 0); + Chain = SDValue(St1, 1); + } + + const EVT ResTys[] = {MVT::i64, MVT::i64, MVT::Other}; + SDValue Ops[] = {DAG.getConstant(ObjSize, dl, MVT::i64), Addr, Chain}; + SDNode *St = DAG.getMachineNode( + ZeroData ? AArch64::STZGloop : AArch64::STGloop, dl, ResTys, Ops); + + DAG.setNodeMemRefs(cast(St), {BaseMemOperand}); + return SDValue(St, 2); +} diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 9d386128fdc..d0967fb973c 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -23,6 +23,10 @@ public: SDValue Chain, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const override; + SDValue EmitTargetCodeForSetTag(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + MachinePointerInfo DstPtrInfo, + bool ZeroData) const override; bool generateFMAsInMachineCombiner(CodeGenOpt::Level OptLevel) const override; }; } diff --git a/test/Analysis/ValueTracking/aarch64.irg.ll b/test/Analysis/ValueTracking/aarch64.irg.ll index 75e198abe85..3d8b0e22d78 100644 --- a/test/Analysis/ValueTracking/aarch64.irg.ll +++ b/test/Analysis/ValueTracking/aarch64.irg.ll @@ -13,6 +13,22 @@ entry: ret void } +; CHECK-LABEL: define void @checkNonnullTagp( +define void @checkNonnullTagp(i8* %tag) { +; CHECK: %[[p:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %a, i8* %tag, i64 1) +; CHECK: %[[p2:.*]] = call i8* @llvm.aarch64.tagp.p0i8(i8* nonnull %[[p]], i8* %tag, i64 2) +; CHECK: call void @use(i8* nonnull %[[p2]]) +entry: + %a = alloca i8, align 8 + + %p = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 1) + %p2 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2) + call void @use(i8* %p2) + + ret void +} + declare i8* @llvm.aarch64.irg(i8*, i64) +declare i8* @llvm.aarch64.tagp.p0i8(i8*, i8*, i64) declare void @use(i8*) diff --git a/test/CodeGen/AArch64/irg.ll b/test/CodeGen/AArch64/irg.ll new file mode 100644 index 00000000000..31a018e1836 --- /dev/null +++ b/test/CodeGen/AArch64/irg.ll @@ -0,0 +1,42 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s + +define i8* @irg_imm16(i8* %p) { +entry: +; CHECK-LABEL: irg_imm16: +; CHECK: mov w[[R:[0-9]+]], #16 +; CHECK: irg x0, x0, x[[R]] +; CHECK: ret + %q = call i8* @llvm.aarch64.irg(i8* %p, i64 16) + ret i8* %q +} + +define i8* @irg_imm0(i8* %p) { +entry: +; CHECK-LABEL: irg_imm0: +; CHECK: irg x0, x0{{$}} +; CHECK: ret + %q = call i8* @llvm.aarch64.irg(i8* %p, i64 0) + ret i8* %q +} + +define i8* @irg_reg(i8* %p, i64 %ex) { +entry: +; CHECK-LABEL: irg_reg: +; CHECK: irg x0, x0, x1 +; CHECK: ret + %q = call i8* @llvm.aarch64.irg(i8* %p, i64 %ex) + ret i8* %q +} + +; undef argument in irg is treated specially +define i8* @irg_sp() { +entry: +; CHECK-LABEL: irg_sp: +; CHECK: irg x0, sp{{$}} +; CHECK: ret + %q = call i8* @llvm.aarch64.irg.sp(i64 0) + ret i8* %q +} + +declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude) +declare i8* @llvm.aarch64.irg.sp(i64 %exclude) diff --git a/test/CodeGen/AArch64/irg_sp_tagp.ll b/test/CodeGen/AArch64/irg_sp_tagp.ll new file mode 100644 index 00000000000..1232090fc6b --- /dev/null +++ b/test/CodeGen/AArch64/irg_sp_tagp.ll @@ -0,0 +1,93 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s + +define i8* @small_alloca() { +entry: +; CHECK-LABEL: small_alloca: +; CHECK: irg [[R:x[0-9]+]], sp{{$}} +; CHECK-NEXT: addg x0, [[R]], #0, #1 +; CHECK: ret + %a = alloca i8, align 16 + %q = call i8* @llvm.aarch64.irg.sp(i64 0) + %q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %q, i64 1) + ret i8* %q1 +} + +; Two large allocas. One's offset overflows addg immediate. +define void @huge_allocas() { +entry: +; CHECK-LABEL: huge_allocas: +; CHECK: irg [[R:x[0-9]+]], sp{{$}} +; CHECK: add [[TMP:x[0-9]+]], [[R]], #3088 +; CHECK: addg x0, [[TMP]], #1008, #1 +; CHECK: addg x1, [[R]], #0, #2 +; CHECK: bl use2 + %a = alloca i8, i64 4096, align 16 + %b = alloca i8, i64 4096, align 16 + %base = call i8* @llvm.aarch64.irg.sp(i64 0) + %a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1) + %b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 2) + call void @use2(i8* %a_t, i8* %b_t) + ret void +} + +; Realigned stack frame. IRG uses value of SP after realignment, +; ADDG for the first stack allocation has offset 0. +define void @realign() { +entry: +; CHECK-LABEL: realign: +; CHECK: add x29, sp, #16 +; CHECK: and sp, x{{[0-9]*}}, #0xffffffffffffffc0 +; CHECK: irg [[R:x[0-9]+]], sp{{$}} +; CHECK: addg x0, [[R]], #0, #1 +; CHECK: bl use + %a = alloca i8, i64 4096, align 64 + %base = call i8* @llvm.aarch64.irg.sp(i64 0) + %a_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %base, i64 1) + call void @use(i8* %a_t) + ret void +} + +; With a dynamic alloca, IRG has to use FP with non-zero offset. +; ADDG offset for the single static alloca is still zero. +define void @dynamic_alloca(i64 %size) { +entry: +; CHECK-LABEL: dynamic_alloca: +; CHECK: sub [[R:x[0-9]+]], x29, #[[OFS:[0-9]+]] +; CHECK: irg [[R]], [[R]] +; CHECK: addg x1, [[R]], #0, #1 +; CHECK: sub x0, x29, #[[OFS]] +; CHECK: bl use2 + %base = call i8* @llvm.aarch64.irg.sp(i64 0) + %a = alloca i128, i64 %size, align 16 + %b = alloca i8, i64 16, align 16 + %b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1) + call void @use2(i8* %b, i8* %b_t) + ret void +} + +; Both dynamic alloca and realigned frame. +; After initial realignment, generate the base pointer. +; IRG uses the base pointer w/o offset. +; Offsets for tagged and untagged pointers to the same alloca match. +define void @dynamic_alloca_and_realign(i64 %size) { +entryz: +; CHECK-LABEL: dynamic_alloca_and_realign: +; CHECK: and sp, x{{.*}}, #0xffffffffffffffc0 +; CHECK: mov x19, sp +; CHECK: irg [[R:x[0-9]+]], x19 +; CHECK: addg x1, [[R]], #[[OFS:[0-9]+]], #1 +; CHECK: add x0, x19, #[[OFS]] +; CHECK: bl use2 + %base = call i8* @llvm.aarch64.irg.sp(i64 0) + %a = alloca i128, i64 %size, align 64 + %b = alloca i8, i64 16, align 16 + %b_t = call i8* @llvm.aarch64.tagp.p0i8(i8* %b, i8* %base, i64 1) + call void @use2(i8* %b, i8* %b_t) + ret void +} + +declare void @use(i8*) +declare void @use2(i8*, i8*) + +declare i8* @llvm.aarch64.irg.sp(i64 %exclude) +declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs) diff --git a/test/CodeGen/AArch64/settag.ll b/test/CodeGen/AArch64/settag.ll new file mode 100644 index 00000000000..9ca188fbce3 --- /dev/null +++ b/test/CodeGen/AArch64/settag.ll @@ -0,0 +1,138 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s + +define void @stg1(i8* %p) { +entry: +; CHECK-LABEL: stg1: +; CHECK: stg x0, [x0] +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 16) + ret void +} + +define void @stg2(i8* %p) { +entry: +; CHECK-LABEL: stg2: +; CHECK: st2g x0, [x0] +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 32) + ret void +} + +define void @stg3(i8* %p) { +entry: +; CHECK-LABEL: stg3: +; CHECK: stg x0, [x0, #32] +; CHECK: st2g x0, [x0] +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 48) + ret void +} + +define void @stg4(i8* %p) { +entry: +; CHECK-LABEL: stg4: +; CHECK: st2g x0, [x0, #32] +; CHECK: st2g x0, [x0] +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 64) + ret void +} + +define void @stg5(i8* %p) { +entry: +; CHECK-LABEL: stg5: +; CHECK: stg x0, [x0, #64] +; CHECK: st2g x0, [x0, #32] +; CHECK: st2g x0, [x0] +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 80) + ret void +} + +define void @stg16(i8* %p) { +entry: +; CHECK-LABEL: stg16: +; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256 +; CHECK: st2g x0, [x0], #32 +; CHECK: sub x[[R]], x[[R]], #32 +; CHECK: cbnz x[[R]], +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 256) + ret void +} + +define void @stg17(i8* %p) { +entry: +; CHECK-LABEL: stg17: +; CHECK: mov {{(w|x)}}[[R:[0-9]+]], #256 +; CHECK: stg x0, [x0], #16 +; CHECK: st2g x0, [x0], #32 +; CHECK: sub x[[R]], x[[R]], #32 +; CHECK: cbnz x[[R]], +; CHECK: ret + call void @llvm.aarch64.settag(i8* %p, i64 272) + ret void +} + +define void @stzg3(i8* %p) { +entry: +; CHECK-LABEL: stzg3: +; CHECK: stzg x0, [x0, #32] +; CHECK: stz2g x0, [x0] +; CHECK: ret + call void @llvm.aarch64.settag.zero(i8* %p, i64 48) + ret void +} + +define void @stzg17(i8* %p) { +entry: +; CHECK-LABEL: stzg17: +; CHECK: mov {{w|x}}[[R:[0-9]+]], #256 +; CHECK: stzg x0, [x0], #16 +; CHECK: stz2g x0, [x0], #32 +; CHECK: sub x[[R]], x[[R]], #32 +; CHECK: cbnz x[[R]], +; CHECK: ret + call void @llvm.aarch64.settag.zero(i8* %p, i64 272) + ret void +} + +define void @stg_alloca1() { +entry: +; CHECK-LABEL: stg_alloca1: +; CHECK: stg sp, [sp] +; CHECK: ret + %a = alloca i8, i32 16, align 16 + call void @llvm.aarch64.settag(i8* %a, i64 16) + ret void +} + +define void @stg_alloca5() { +entry: +; CHECK-LABEL: stg_alloca5: +; CHECK: stg sp, [sp, #64] +; CHECK: st2g sp, [sp, #32] +; CHECK: st2g sp, [sp] +; CHECK: ret + %a = alloca i8, i32 80, align 16 + call void @llvm.aarch64.settag(i8* %a, i64 80) + ret void +} + +define void @stg_alloca17() { +entry: +; CHECK-LABEL: stg_alloca17: +; CHECK: mov [[P:x[0-9]+]], sp +; CHECK: stg [[P]], {{\[}}[[P]]{{\]}}, #16 +; CHECK: mov {{w|x}}[[R:[0-9]+]], #256 +; CHECK: st2g [[P]], {{\[}}[[P]]{{\]}}, #32 +; CHECK: sub x[[R]], x[[R]], #32 +; CHECK: cbnz x[[R]], +; CHECK: ret + %a = alloca i8, i32 272, align 16 + call void @llvm.aarch64.settag(i8* %a, i64 272) + ret void +} + +declare void @llvm.aarch64.settag(i8* %p, i64 %a) +declare void @llvm.aarch64.settag.zero(i8* %p, i64 %a) diff --git a/test/CodeGen/AArch64/stgp.ll b/test/CodeGen/AArch64/stgp.ll new file mode 100644 index 00000000000..b4af16fd9ff --- /dev/null +++ b/test/CodeGen/AArch64/stgp.ll @@ -0,0 +1,78 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s + +define void @stgp0(i64 %a, i64 %b, i8* %p) { +entry: +; CHECK-LABEL: stgp0: +; CHECK: stgp x0, x1, [x2] +; CHECK: ret + call void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b) + ret void +} + +define void @stgp1004(i64 %a, i64 %b, i8* %p) { +entry: +; CHECK-LABEL: stgp1004: +; CHECK: add [[R:x[0-9]+]], x2, #1004 +; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}} +; CHECK: ret + %q = getelementptr i8, i8* %p, i32 1004 + call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b) + ret void +} + +define void @stgp1008(i64 %a, i64 %b, i8* %p) { +entry: +; CHECK-LABEL: stgp1008: +; CHECK: stgp x0, x1, [x2, #1008] +; CHECK: ret + %q = getelementptr i8, i8* %p, i32 1008 + call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b) + ret void +} + +define void @stgp1024(i64 %a, i64 %b, i8* %p) { +entry: +; CHECK-LABEL: stgp1024: +; CHECK: add [[R:x[0-9]+]], x2, #1024 +; CHECK: stgp x0, x1, {{\[}}[[R]]{{\]}} +; CHECK: ret + %q = getelementptr i8, i8* %p, i32 1024 + call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b) + ret void +} + +define void @stgp_1024(i64 %a, i64 %b, i8* %p) { +entry: +; CHECK-LABEL: stgp_1024: +; CHECK: stgp x0, x1, [x2, #-1024] +; CHECK: ret + %q = getelementptr i8, i8* %p, i32 -1024 + call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b) + ret void +} + +define void @stgp_1040(i64 %a, i64 %b, i8* %p) { +entry: +; CHECK-LABEL: stgp_1040: +; CHECK: sub [[R:x[0-9]+]], x2, #1040 +; CHECK: stgp x0, x1, [x{{.*}}] +; CHECK: ret + %q = getelementptr i8, i8* %p, i32 -1040 + call void @llvm.aarch64.stgp(i8* %q, i64 %a, i64 %b) + ret void +} + +define void @stgp_alloca(i64 %a, i64 %b) { +entry: +; CHECK-LABEL: stgp_alloca: +; CHECK: stgp x0, x1, [sp] +; CHECK: stgp x1, x0, [sp, #16] +; CHECK: ret + %x = alloca i8, i32 32, align 16 + call void @llvm.aarch64.stgp(i8* %x, i64 %a, i64 %b) + %x1 = getelementptr i8, i8* %x, i32 16 + call void @llvm.aarch64.stgp(i8* %x1, i64 %b, i64 %a) + ret void +} + +declare void @llvm.aarch64.stgp(i8* %p, i64 %a, i64 %b) diff --git a/test/CodeGen/AArch64/tagp.ll b/test/CodeGen/AArch64/tagp.ll new file mode 100644 index 00000000000..0af65389818 --- /dev/null +++ b/test/CodeGen/AArch64/tagp.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -mtriple=aarch64 -mattr=+mte | FileCheck %s + +define i8* @tagp2(i8* %p, i8* %tag) { +entry: +; CHECK-LABEL: tagp2: +; CHECK: subp [[R:x[0-9]+]], x0, x1 +; CHECK: add [[R]], [[R]], x1 +; CHECK: addg x0, [[R]], #0, #2 +; CHECK: ret + %q = call i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 2) + ret i8* %q +} + +define i8* @irg_tagp_unrelated(i8* %p, i8* %q) { +entry: +; CHECK-LABEL: irg_tagp_unrelated: +; CHECK: irg [[R0:x[0-9]+]], x0{{$}} +; CHECK: subp [[R:x[0-9]+]], [[R0]], x1 +; CHECK: add [[R]], [[R0]], x1 +; CHECK: addg x0, [[R]], #0, #1 +; CHECK: ret + %p1 = call i8* @llvm.aarch64.irg(i8* %p, i64 0) + %q1 = call i8* @llvm.aarch64.tagp.p0i8(i8* %p1, i8* %q, i64 1) + ret i8* %q1 +} + +define i8* @tagp_alloca(i8* %tag) { +entry: +; CHECK-LABEL: tagp_alloca: +; CHECK: mov [[R0:x[0-9]+]], sp{{$}} +; CHECK: subp [[R:x[0-9]+]], [[R0]], x0{{$}} +; CHECK: add [[R]], [[R0]], x0{{$}} +; CHECK: addg x0, [[R]], #0, #3 +; CHECK: ret + %a = alloca i8, align 16 + %q = call i8* @llvm.aarch64.tagp.p0i8(i8* %a, i8* %tag, i64 3) + ret i8* %q +} + +declare i8* @llvm.aarch64.irg(i8* %p, i64 %exclude) +declare i8* @llvm.aarch64.tagp.p0i8(i8* %p, i8* %tag, i64 %ofs)