From c7506ed12457e5a24b35967d65b138eeb787582b Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 27 Mar 2017 15:52:38 +0000 Subject: [PATCH] [AArch64] Mark mrs of TPIDR_EL0 (thread pointer) as not having side effects. Among other things, this allows Machine LICM to hoist a costly 'mrs' instruction from within a loop. Differential Revision: http://reviews.llvm.org/D31151 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@298851 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../AArch64/AArch64ExpandPseudoInsts.cpp | 8 +++ lib/Target/AArch64/AArch64InstrInfo.td | 6 +- test/CodeGen/AArch64/thread-pointer.ll | 60 +++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/AArch64/thread-pointer.ll diff --git a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index 5b8a1e05d14..e4e0e3c854a 100644 --- a/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -17,6 +17,7 @@ #include "MCTargetDesc/AArch64AddressingModes.h" #include "AArch64InstrInfo.h" #include "AArch64Subtarget.h" +#include "Utils/AArch64BaseInfo.h" #include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -889,6 +890,13 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, MI.eraseFromParent(); return true; } + case AArch64::MOVbaseTLS: { + unsigned DstReg = MI.getOperand(0).getReg(); + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg) + .addImm(AArch64SysReg::TPIDR_EL0); + MI.eraseFromParent(); + return true; + } case AArch64::MOVi32imm: return expandMOVImm(MBB, MBBI, 32); diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index af0b16411b8..6a6dba34606 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -426,8 +426,10 @@ def MSRpstateImm1 : MSRpstateImm0_1; def MSRpstateImm4 : MSRpstateImm0_15; // The thread pointer (on Linux, at least, where this has been implemented) is -// TPIDR_EL0. -def : Pat<(AArch64threadpointer), (MRS 0xde82)>; +// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects. +let hasSideEffects = 0 in +def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins), + [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>; // The cycle counter PMC register is PMCCNTR_EL0. let Predicates = [HasPerfMon] in diff --git a/test/CodeGen/AArch64/thread-pointer.ll b/test/CodeGen/AArch64/thread-pointer.ll new file mode 100644 index 00000000000..91585791a58 --- /dev/null +++ b/test/CodeGen/AArch64/thread-pointer.ll @@ -0,0 +1,60 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s + +@x = thread_local local_unnamed_addr global i32 0, align 4 +@y = thread_local local_unnamed_addr global i32 0, align 4 + +; Machine LICM should hoist the mrs into the loop preheader. +; CHECK-LABEL: @test1 +; CHECK: BB#1: +; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0 +; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x +; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x +; +; CHECK: .LBB0_2: +; CHECK: ldr w0, [x[[REG2]]] +; CHECK: bl bar +; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1 +; CHECK: cbnz w[[REG3]], .LBB0_2 + +define void @test1(i32 %n) local_unnamed_addr { +entry: + %cmp3 = icmp sgt i32 %n, 0 + br i1 %cmp3, label %bb1, label %bb2 + +bb1: + br label %for.body + +for.body: + %i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ] + %0 = load i32, i32* @x, align 4 + tail call void @bar(i32 %0) #2 + %inc = add nuw nsw i32 %i.04, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %bb2, label %for.body + +bb2: + ret void +} + +; Machine CSE should combine the the mrs between the load of %x and %y. +; CHECK-LABEL: @test2 +; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0 +; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0 +; CHECK: ret +define void @test2(i32 %c) local_unnamed_addr #0 { +entry: + %0 = load i32, i32* @x, align 4 + tail call void @bar(i32 %0) #2 + %cmp = icmp eq i32 %c, 0 + br i1 %cmp, label %if.end, label %if.then + +if.then: + %1 = load i32, i32* @y, align 4 + tail call void @bar(i32 %1) #2 + br label %if.end + +if.end: + ret void +} + +declare void @bar(i32) local_unnamed_addr -- 2.40.0