#include "MCTargetDesc/AArch64AddressingModes.h"
#include "AArch64InstrInfo.h"
#include "AArch64Subtarget.h"
+#include "Utils/AArch64BaseInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
MI.eraseFromParent();
return true;
}
+ case AArch64::MOVbaseTLS: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
+ .addImm(AArch64SysReg::TPIDR_EL0);
+ MI.eraseFromParent();
+ return true;
+ }
case AArch64::MOVi32imm:
return expandMOVImm(MBB, MBBI, 32);
def MSRpstateImm4 : MSRpstateImm0_15;
// The thread pointer (on Linux, at least, where this has been implemented) is
-// TPIDR_EL0.
-def : Pat<(AArch64threadpointer), (MRS 0xde82)>;
+// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
+let hasSideEffects = 0 in
+def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
+ [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
// The cycle counter PMC register is PMCCNTR_EL0.
let Predicates = [HasPerfMon] in
--- /dev/null
+; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -o - %s | FileCheck %s
+
+@x = thread_local local_unnamed_addr global i32 0, align 4
+@y = thread_local local_unnamed_addr global i32 0, align 4
+
+; Machine LICM should hoist the mrs into the loop preheader.
+; CHECK-LABEL: @test1
+; CHECK: BB#1:
+; CHECK: mrs x[[BASE:[0-9]+]], TPIDR_EL0
+; CHECK: add x[[REG1:[0-9]+]], x[[BASE]], :tprel_hi12:x
+; CHECK: add x[[REG2:[0-9]+]], x[[REG1]], :tprel_lo12_nc:x
+;
+; CHECK: .LBB0_2:
+; CHECK: ldr w0, [x[[REG2]]]
+; CHECK: bl bar
+; CHECK: sub w[[REG3:[0-9]+]], w{{[0-9]+}}, #1
+; CHECK: cbnz w[[REG3]], .LBB0_2
+
+define void @test1(i32 %n) local_unnamed_addr {
+entry:
+ %cmp3 = icmp sgt i32 %n, 0
+ br i1 %cmp3, label %bb1, label %bb2
+
+bb1:
+ br label %for.body
+
+for.body:
+ %i.04 = phi i32 [ %inc, %for.body ], [ 0, %bb1 ]
+ %0 = load i32, i32* @x, align 4
+ tail call void @bar(i32 %0) #2
+ %inc = add nuw nsw i32 %i.04, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %bb2, label %for.body
+
+bb2:
+ ret void
+}
+
+; Machine CSE should combine the the mrs between the load of %x and %y.
+; CHECK-LABEL: @test2
+; CHECK: mrs x{{[0-9]+}}, TPIDR_EL0
+; CHECK-NOT: mrs x{{[0-9]+}}, TPIDR_EL0
+; CHECK: ret
+define void @test2(i32 %c) local_unnamed_addr #0 {
+entry:
+ %0 = load i32, i32* @x, align 4
+ tail call void @bar(i32 %0) #2
+ %cmp = icmp eq i32 %c, 0
+ br i1 %cmp, label %if.end, label %if.then
+
+if.then:
+ %1 = load i32, i32* @y, align 4
+ tail call void @bar(i32 %1) #2
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+declare void @bar(i32) local_unnamed_addr