------------------------------------------------------------------------
r353308 | tnorthover | 2019-02-06 16:26:35 +0100 (Wed, 06 Feb 2019) | 5 lines
AArch64: enforce even/odd register pairs for CASP instructions.
ARMv8.1a CASP instructions need the first of the pair to be an even register
(otherwise the encoding is unallocated). We enforced this during assembly, but
not CodeGen before.
------------------------------------------------------------------------
------------------------------------------------------------------------
r353383 | tnorthover | 2019-02-07 11:35:34 +0100 (Thu, 07 Feb 2019) | 4 lines
AArch64: implement copy for paired GPR registers.
When doing 128-bit atomics using CASP we might need to copy a GPRPair to a
different register, but that was unimplemented up to now.
------------------------------------------------------------------------
git-svn-id: https://llvm.org/svn/llvm-project/llvm/branches/release_80@353822
91177308-0d34-0410-b5e6-
96231b3b80d8
}
}
+void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg,
+ unsigned SrcReg, bool KillSrc,
+ unsigned Opcode, unsigned ZeroReg,
+ llvm::ArrayRef<unsigned> Indices) const {
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ unsigned NumRegs = Indices.size();
+
+#ifndef NDEBUG
+ uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
+ uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+ assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
+ "GPR reg sequences should not be able to overlap");
+#endif
+
+ for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
+ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
+ AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
+ MIB.addReg(ZeroReg);
+ AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
+ MIB.addImm(0);
+ }
+}
+
void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg,
return;
}
+ if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
+ AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
+ copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
+ AArch64::XZR, Indices);
+ return;
+ }
+
+ if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
+ AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
+ static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
+ copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
+ AArch64::WZR, Indices);
+ return;
+ }
+
if (AArch64::FPR128RegClass.contains(DestReg) &&
AArch64::FPR128RegClass.contains(SrcReg)) {
if (Subtarget.hasNEON()) {
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc, unsigned Opcode,
llvm::ArrayRef<unsigned> Indices) const;
+ void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ DebugLoc DL, unsigned DestReg, unsigned SrcReg,
+ bool KillSrc, unsigned Opcode, unsigned ZeroReg,
+ llvm::ArrayRef<unsigned> Indices) const;
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
bool KillSrc) const override;
// ARMv8.1a atomic CASP register operands
-def WSeqPairs : RegisterTuples<[sube32, subo32],
- [(rotl GPR32, 0), (rotl GPR32, 1)]>;
-def XSeqPairs : RegisterTuples<[sube64, subo64],
- [(rotl GPR64, 0), (rotl GPR64, 1)]>;
+def WSeqPairs : RegisterTuples<[sube32, subo32],
+ [(decimate (rotl GPR32, 0), 2),
+ (decimate (rotl GPR32, 1), 2)]>;
+def XSeqPairs : RegisterTuples<[sube64, subo64],
+ [(decimate (rotl GPR64, 0), 2),
+ (decimate (rotl GPR64, 1), 2)]>;
def WSeqPairsClass : RegisterClass<"AArch64", [untyped], 32,
(add WSeqPairs)>{
if (RegNo & 0x1)
return Fail;
- unsigned Register = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo);
- Inst.addOperand(MCOperand::createReg(Register));
+ unsigned Reg = AArch64MCRegisterClasses[RegClassID].getRegister(RegNo / 2);
+ Inst.addOperand(MCOperand::createReg(Reg));
return Success;
}
--- /dev/null
+; RUN: llc -mtriple arm64-apple-ios -mattr=+lse %s -o - | FileCheck %s
+
+; Only "even,even+1" pairs are valid for CASP instructions. Make sure LLVM
+; doesn't allocate odd ones and that it can copy them around properly. N.b. we
+; don't actually check that they're sequential because FileCheck can't; odd/even
+; will have to be good enough.
+define void @test_atomic_cmpxchg_i128_register_shuffling(i128* %addr, i128 %desired, i128 %new) nounwind {
+; CHECK-LABEL: test_atomic_cmpxchg_i128_register_shuffling:
+; CHECK-DAG: mov [[DESIRED_LO:x[0-9]*[02468]]], x1
+; CHECK-DAG: mov [[DESIRED_HI:x[0-9]*[13579]]], x2
+; CHECK-DAG: mov [[NEW_LO:x[0-9]*[02468]]], x3
+; CHECK-DAG: mov [[NEW_HI:x[0-9]*[13579]]], x4
+; CHECK: caspal [[DESIRED_LO]], [[DESIRED_HI]], [[NEW_LO]], [[NEW_HI]], [x0]
+
+ %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst
+ ret void
+}
--- /dev/null
+# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+v8.1a -run-pass=postrapseudos | FileCheck %s
+---
+# CHECK-LABEL: name: copy_xseqpairs
+name: copy_xseqpairs
+body: |
+ bb.0:
+ ; CHECK: $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0
+ ; CHECK: $x0 = ORRXrs $xzr, $x4, 0
+ ; CHECK: $x1 = ORRXrs $xzr, $x5, 0
+ $x4_x5 = CASPALX $x4_x5, $x2_x3, $x0
+ $x0_x1 = COPY $x4_x5
+...
+---
+# CHECK-LABEL: name: copy_wseqpairs
+name: copy_wseqpairs
+body: |
+ bb.0:
+ ; CHECK: $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0
+ ; CHECK: $w0 = ORRWrs $wzr, $w4, 0
+ ; CHECK: $w1 = ORRWrs $wzr, $w5, 0
+ $w4_w5 = CASPALW $w4_w5, $w2_w3, $x0
+ $w0_w1 = COPY $w4_w5
+...