From 638f2cdc226c3ecd8371727f26e9cb8206f5cd99 Mon Sep 17 00:00:00 2001 From: Dean Michael Berris Date: Mon, 8 May 2017 05:45:21 +0000 Subject: [PATCH] [XRay] Custom event logging intrinsic This patch introduces an LLVM intrinsic and a target opcode for custom event logging in XRay. Initially, its use case will be to allow users of XRay to log some type of string ("poor man's printf"). The target opcode compiles to a noop sled large enough to enable calling through to a runtime-determined relative function call. At runtime, when X-Ray is enabled, the sled is replaced by compiler-rt with a trampoline to the logic for creating the custom log entries. Future patches will implement the compiler-rt parts and clang-side support for emitting the IR corresponding to this intrinsic. Reviewers: timshen, dberris Subscribers: igorb, pelikan, rSerge, timshen, echristo, dberris, llvm-commits Differential Revision: https://reviews.llvm.org/D27503 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302405 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/AsmPrinter.h | 1 + include/llvm/CodeGen/FastISel.h | 1 + include/llvm/IR/Intrinsics.td | 8 ++ include/llvm/Target/Target.td | 10 +++ include/llvm/Target/TargetOpcodes.def | 4 + lib/CodeGen/SelectionDAG/FastISel.cpp | 22 +++++ .../SelectionDAG/SelectionDAGBuilder.cpp | 30 +++++++ lib/Target/X86/X86AsmPrinter.h | 1 + lib/Target/X86/X86ISelLowering.cpp | 4 + lib/Target/X86/X86MCInstLower.cpp | 80 +++++++++++++++++++ test/CodeGen/X86/xray-custom-log.ll | 23 ++++++ 11 files changed, 184 insertions(+) create mode 100644 test/CodeGen/X86/xray-custom-log.ll diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index e008c7ceb06..180c0b57924 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -226,6 +226,7 @@ public: FUNCTION_EXIT = 1, TAIL_CALL = 2, LOG_ARGS_ENTER = 3, + CUSTOM_EVENT = 4, }; // The table will contain these structs that point to the sled, the function diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h index 2abe3bb1155..57fa0c73d27 100644 --- a/include/llvm/CodeGen/FastISel.h +++ b/include/llvm/CodeGen/FastISel.h @@ -506,6 +506,7 @@ protected: bool selectCast(const User *I, unsigned Opcode); bool selectExtractValue(const User *I); bool selectInsertValue(const User *I); + bool selectXRayCustomEvent(const CallInst *II); private: /// \brief Handle PHI nodes in successor blocks. diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td index cf7e5d8758a..7b78d4d3d34 100644 --- a/include/llvm/IR/Intrinsics.td +++ b/include/llvm/IR/Intrinsics.td @@ -795,6 +795,14 @@ def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty], def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty], [IntrReadMem, IntrArgMemOnly]>; +// Xray intrinsics +//===----------------------------------------------------------------------===// +// Custom event logging for x-ray. +// Takes a pointer to a string and the length of the string. +def int_xray_customevent : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], + [NoCapture<0>, ReadOnly<0>, IntrWriteMem]>; +//===----------------------------------------------------------------------===// + //===------ Memory intrinsics with element-wise atomicity guarantees ------===// // diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td index d7fbca93f59..fc35b4527bc 100644 --- a/include/llvm/Target/Target.td +++ b/include/llvm/Target/Target.td @@ -1002,6 +1002,16 @@ def PATCHABLE_TAIL_CALL : Instruction { let hasSideEffects = 1; let isReturn = 1; } +def PATCHABLE_EVENT_CALL : Instruction { + let OutOperandList = (outs); + let InOperandList = (ins ptr_rc:$event, i8imm:$size); + let AsmString = "# XRay Custom Event Log."; + let usesCustomInserter = 1; + let isCall = 1; + let mayLoad = 1; + let mayStore = 1; + let hasSideEffects = 1; +} def FENTRY_CALL : Instruction { let OutOperandList = (outs unknown:$dst); let InOperandList = (ins variable_ops); diff --git a/include/llvm/Target/TargetOpcodes.def b/include/llvm/Target/TargetOpcodes.def index 96db6e0a976..36764249632 100644 --- a/include/llvm/Target/TargetOpcodes.def +++ b/include/llvm/Target/TargetOpcodes.def @@ -182,6 +182,10 @@ HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT) /// PATCHABLE_RET which specifically only works for return instructions. HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL) +/// Wraps a logging call and its arguments with nop sleds. At runtime, this can be +/// patched to insert instrumentation instructions. +HANDLE_TARGET_OPCODE(PATCHABLE_EVENT_CALL) + /// The following generic opcodes are not supposed to appear after ISel. /// This is something we might want to relax, but for now, this is convenient /// to produce diagnostics. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 6fb26fc3b73..8c98e3740f6 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -861,6 +861,25 @@ bool FastISel::selectPatchpoint(const CallInst *I) { return true; } +bool FastISel::selectXRayCustomEvent(const CallInst *I) { + const auto &Triple = TM.getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return true; // don't do anything to this instruction. + SmallVector Ops; + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)), + /*IsDef=*/false)); + Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)), + /*IsDef=*/false)); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::PATCHABLE_EVENT_CALL)); + for (auto &MO : Ops) + MIB.add(MO); + // Insert the Patchable Event Call instruction, that gets lowered properly. + return true; +} + + /// Returns an AttributeList representing the attributes applied to the return /// value of the given call. static AttributeList getReturnAttrs(FastISel::CallLoweringInfo &CLI) { @@ -1252,6 +1271,9 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { case Intrinsic::experimental_patchpoint_void: case Intrinsic::experimental_patchpoint_i64: return selectPatchpoint(II); + + case Intrinsic::xray_customevent: + return selectXRayCustomEvent(II); } return fastLowerIntrinsicCall(II); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 979c2fc74de..50313e2da88 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5703,7 +5703,37 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, N); return nullptr; } + case Intrinsic::xray_customevent: { + // Here we want to make sure that the intrinsic behaves as if it has a + // specific calling convention, and only for x86_64. + // FIXME: Support other platforms later. + const auto &Triple = DAG.getTarget().getTargetTriple(); + if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux()) + return nullptr; + + SDLoc DL = getCurSDLoc(); + SmallVector Ops; + // We want to say that we always want the arguments in registers. + SDValue LogEntryVal = getValue(I.getArgOperand(0)); + SDValue StrSizeVal = getValue(I.getArgOperand(1)); + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue Chain = getRoot(); + Ops.push_back(LogEntryVal); + Ops.push_back(StrSizeVal); + Ops.push_back(Chain); + + // We need to enforce the calling convention for the callsite, so that + // argument ordering is enforced correctly, and that register allocation can + // see that some registers may be assumed clobbered and have to preserve + // them across calls to the intrinsic. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHABLE_EVENT_CALL, + DL, NodeTys, Ops); + SDValue patchableNode = SDValue(MN, 0); + DAG.setRoot(patchableNode); + setValue(&I, patchableNode); + return nullptr; + } case Intrinsic::experimental_deoptimize: LowerDeoptimizeCall(&I); return nullptr; diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 44bc373b039..d7c3b74d3ef 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -91,6 +91,7 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { X86MCInstLower &MCIL); void LowerPATCHABLE_RET(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); + void LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); void LowerFENTRY_CALL(const MachineInstr &MI, X86MCInstLower &MCIL); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 8d57b8eecab..9ee2234595f 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -26517,6 +26517,10 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, case TargetOpcode::STACKMAP: case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); + + case TargetOpcode::PATCHABLE_EVENT_CALL: + // Do nothing here, handle in xray instrumentation pass. + return BB; case X86::LCMPXCHG8B: { const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 550e3543a71..598d88d8b9c 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1040,6 +1040,83 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI, getSubtargetInfo()); } +void X86AsmPrinter::LowerPATCHABLE_EVENT_CALL(const MachineInstr &MI, + X86MCInstLower &MCIL) { + assert(Subtarget->is64Bit() && "XRay custom events only suports X86-64"); + + // We want to emit the following pattern, which follows the x86 calling + // convention to prepare for the trampoline call to be patched in. + // + // + // .p2align 1, ... + // .Lxray_event_sled_N: + // jmp +N // jump across the call instruction + // callq __xray_CustomEvent // force relocation to symbol + // + // + // The relative jump needs to jump forward 24 bytes: + // 10 (args) + 5 (nops) + 9 (cleanup) + // + // After patching, it would look something like: + // + // nopw (2-byte nop) + // callq __xrayCustomEvent // already lowered + // + // --- + // First we emit the label and the jump. + auto CurSled = OutContext.createTempSymbol("xray_event_sled_", true); + OutStreamer->AddComment("# XRay Custom Event Log"); + OutStreamer->EmitCodeAlignment(2); + OutStreamer->EmitLabel(CurSled); + + // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as + // an operand (computed as an offset from the jmp instruction). + // FIXME: Find another less hacky way do force the relative jump. + OutStreamer->EmitBytes("\xeb\x14"); + + // The default C calling convention will place two arguments into %rcx and + // %rdx -- so we only work with those. + unsigned UsedRegs[] = {X86::RDI, X86::RSI, X86::RAX}; + + // Because we will use %rax, we preserve that across the call. + EmitAndCountInstruction(MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); + + // Then we put the operands in the %rdi and %rsi registers. + for (unsigned I = 0; I < MI.getNumOperands(); ++I) + if (auto Op = MCIL.LowerMachineOperand(&MI, MI.getOperand(I))) { + if (Op->isImm()) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + .addReg(UsedRegs[I]) + .addImm(Op->getImm())); + else if (Op->isReg()) { + if (Op->getReg() != UsedRegs[I]) + EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) + .addReg(UsedRegs[I]) + .addReg(Op->getReg())); + else + EmitNops(*OutStreamer, 3, Subtarget->is64Bit(), getSubtargetInfo()); + } + } + + // We emit a hard dependency on the __xray_CustomEvent symbol, which is the + // name of the trampoline to be implemented by the XRay runtime. We put this + // explicitly in the %rax register. + auto TSym = OutContext.getOrCreateSymbol("__xray_CustomEvent"); + MachineOperand TOp = MachineOperand::CreateMCSymbol(TSym); + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri) + .addReg(X86::RAX) + .addOperand(MCIL.LowerSymbolOperand(TOp, TSym))); + + // Emit the call instruction. + EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(X86::RAX)); + + // Restore caller-saved and used registers. + OutStreamer->AddComment("xray custom event end."); + EmitAndCountInstruction(MCInstBuilder(X86::POP64r).addReg(X86::RAX)); + + recordSled(CurSled, MI, SledKind::CUSTOM_EVENT); +} + void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI, X86MCInstLower &MCIL) { // We want to emit the following pattern: @@ -1415,6 +1492,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case TargetOpcode::PATCHABLE_TAIL_CALL: return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering); + + case TargetOpcode::PATCHABLE_EVENT_CALL: + return LowerPATCHABLE_EVENT_CALL(*MI, MCInstLowering); case X86::MORESTACK_RET: EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); diff --git a/test/CodeGen/X86/xray-custom-log.ll b/test/CodeGen/X86/xray-custom-log.ll new file mode 100644 index 00000000000..63625d44b4c --- /dev/null +++ b/test/CodeGen/X86/xray-custom-log.ll @@ -0,0 +1,23 @@ +; RUN: llc -filetype=asm -o - -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @fn() nounwind noinline uwtable "function-instrument"="xray-always" { + %eventptr = alloca i8 + %eventsize = alloca i32 + store i32 3, i32* %eventsize + %val = load i32, i32* %eventsize + call void @llvm.xray.customevent(i8* %eventptr, i32 %val) + ; CHECK-LABEL: Lxray_event_sled_0: + ; CHECK-NEXT: .ascii "\353\024 + ; CHECK-NEXT: pushq %rax + ; CHECK-NEXT: movq {{.*}}, %rdi + ; CHECK-NEXT: movq {{.*}}, %rsi + ; CHECK-NEXT: movabsq $__xray_CustomEvent, %rax + ; CHECK-NEXT: callq *%rax + ; CHECK-NEXT: popq %rax + ret i32 0 +} +; CHECK: .section {{.*}}xray_instr_map +; CHECK-LABEL: Lxray_synthetic_0: +; CHECK: .quad {{.*}}xray_event_sled_0 + +declare void @llvm.xray.customevent(i8*, i32) -- 2.40.0