From 900bd7250eec2e6659dd8490028b8233fb61e88e Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Tue, 1 Oct 2019 01:44:46 +0000 Subject: [PATCH] AMDGPU/GlobalISel: Avoid creating shift of 0 in arg lowering This is sort of papering over the fact that we don't run a combiner anywhere, but avoiding creating 2 instructions in the first place is easy. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@373293 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 11 ++++++++--- .../AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 40d95dcef0c..e289e8e689a 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1717,9 +1717,14 @@ bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B, const unsigned Mask = Arg->getMask(); const unsigned Shift = countTrailingZeros(Mask); - auto ShiftAmt = B.buildConstant(S32, Shift); - auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt); - B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift)); + Register AndMaskSrc = LiveIn; + + if (Shift != 0) { + auto ShiftAmt = B.buildConstant(S32, Shift); + AndMaskSrc = B.buildLShr(S32, LiveIn, ShiftAmt).getReg(0); + } + + B.buildAnd(DstReg, AndMaskSrc, B.buildConstant(S32, Mask >> Shift)); } else B.buildCopy(DstReg, LiveIn); diff --git a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll index fb5f881ad85..946a138bee4 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll +++ b/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll @@ -89,7 +89,7 @@ bb2: } ; ALL-LABEL: {{^}}test_workitem_id_x_func: -; ALL: v_lshrrev_b32_e32 v2, 0, v2 +; ALL: s_waitcnt ; ALL-NEXT: v_and_b32_e32 v2, 0x3ff, v2 define void @test_workitem_id_x_func(i32 addrspace(1)* %out) #1 { %id = call i32 @llvm.amdgcn.workitem.id.x() -- 2.40.0