From ab28f3b39e53596ec660fdcb3f9c7be922355003 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Wed, 12 Apr 2017 21:58:23 +0000 Subject: [PATCH] AMDGPU: Fix invalid copies when copying i1 to phys reg Insert a VReg_1 virtual register so the i1 workaround pass can handle it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@300113 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 2 +- lib/Target/AMDGPU/SIISelLowering.cpp | 30 ++++++++++++++++++-- lib/Target/AMDGPU/SIISelLowering.h | 2 +- test/CodeGen/AMDGPU/inline-asm.ll | 36 ++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 4 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index fc3ea67fad0..318de7f2e3d 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -563,7 +563,7 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::CopyToReg: { const SITargetLowering& Lowering = *static_cast(getTargetLowering()); - Lowering.legalizeTargetIndependentNode(N, *CurDAG); + N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); break; } case ISD::AND: diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index eda825d8c6e..7268131396d 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4885,8 +4885,33 @@ static bool isFrameIndexOp(SDValue Op) { /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG) /// with frame index operands. /// LLVM assumes that inputs are to these instructions are registers. -void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, - SelectionDAG &DAG) const { +SDNode *SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, + SelectionDAG &DAG) const { + if (Node->getOpcode() == ISD::CopyToReg) { + RegisterSDNode *DestReg = cast(Node->getOperand(1)); + SDValue SrcVal = Node->getOperand(2); + + // Insert a copy to a VReg_1 virtual register so LowerI1Copies doesn't have + // to try understanding copies to physical registers. + if (SrcVal.getValueType() == MVT::i1 && + TargetRegisterInfo::isPhysicalRegister(DestReg->getReg())) { + SDLoc SL(Node); + MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); + SDValue VReg = DAG.getRegister( + MRI.createVirtualRegister(&AMDGPU::VReg_1RegClass), MVT::i1); + + SDNode *Glued = Node->getGluedNode(); + SDValue ToVReg + = DAG.getCopyToReg(Node->getOperand(0), SL, VReg, SrcVal, + SDValue(Glued, Glued ? Glued->getNumValues() - 1 : 0)); + SDValue ToResultReg + = DAG.getCopyToReg(ToVReg, SL, SDValue(DestReg, 0), + VReg, ToVReg.getValue(1)); + DAG.ReplaceAllUsesWith(Node, ToResultReg.getNode()); + DAG.RemoveDeadNode(Node); + return ToResultReg.getNode(); + } + } SmallVector Ops; for (unsigned i = 0; i < Node->getNumOperands(); ++i) { @@ -4902,6 +4927,7 @@ void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node, } DAG.UpdateNodeOperands(Node, Ops); + return Node; } /// \brief Fold the instructions after selecting them. diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index 452ee684ef6..d177777ad5e 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -206,7 +206,7 @@ public: SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const override; - void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; + SDNode *legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const; MachineSDNode *wrapAddr64Rsrc(SelectionDAG &DAG, const SDLoc &DL, SDValue Ptr) const; diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll index 5d49b11f0d4..0d7e07b9a62 100644 --- a/test/CodeGen/AMDGPU/inline-asm.ll +++ b/test/CodeGen/AMDGPU/inline-asm.ll @@ -196,3 +196,39 @@ entry: call void asm sideeffect "; use $0 ", "{VGPR0_VGPR1}"(i64 123456) ret void } + +; CHECK-LABEL: {{^}}i1_imm_input_phys_vgpr: +; CHECK: v_mov_b32_e32 v0, -1{{$}} +; CHECK: ; use v0 +define amdgpu_kernel void @i1_imm_input_phys_vgpr() { +entry: + call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 true) + ret void +} + +; CHECK-LABEL: {{^}}i1_input_phys_vgpr: +; CHECK: {{buffer|flat}}_load_ubyte [[LOAD:v[0-9]+]] +; CHECK: v_and_b32_e32 [[LOAD]], 1, [[LOAD]] +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, [[LOAD]] +; CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK: ; use v0 +define amdgpu_kernel void @i1_input_phys_vgpr() { +entry: + %val = load i1, i1 addrspace(1)* undef + call void asm sideeffect "; use $0 ", "{VGPR0}"(i1 %val) + ret void +} + +; FIXME: Should be scheduled to shrink vcc +; CHECK-LABEL: {{^}}i1_input_phys_vgpr_x2: +; CHECK: v_cmp_eq_u32_e32 vcc, 1, v0 +; CHECK: v_cmp_eq_u32_e64 s[0:1], 1, v1 +; CHECK: v_cndmask_b32_e64 v0, 0, -1, vcc +; CHECK: v_cndmask_b32_e64 v1, 0, -1, s[0:1] +define amdgpu_kernel void @i1_input_phys_vgpr_x2() { +entry: + %val0 = load volatile i1, i1 addrspace(1)* undef + %val1 = load volatile i1, i1 addrspace(1)* undef + call void asm sideeffect "; use $0 $1 ", "{VGPR0}, {VGPR1}"(i1 %val0, i1 %val1) + ret void +} -- 2.50.1