From: Nicolai Haehnle Date: Tue, 8 Oct 2019 12:46:32 +0000 (+0000) Subject: AMDGPU: Propagate undef flag during pre-RA exec mask optimizations X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c54901d36071b8c03c9428b7d14c29d33a460d0a;p=llvm AMDGPU: Propagate undef flag during pre-RA exec mask optimizations Summary: Issue: https://github.com/GPUOpen-Drivers/llpc/issues/204 Reviewers: arsenm, rampitec Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68184 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374041 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp index 681c3b35f75..fdd30db6a7c 100644 --- a/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp +++ b/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp @@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB, Op1->getImm() != 0 || Op2->getImm() != 1) return AMDGPU::NoRegister; - LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' - << *Cmp << '\t' << *And); + LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t' + << *And); Register CCReg = CC->getReg(); LIS->RemoveMachineInstrFromMaps(*And); - MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(), - TII->get(Andn2Opc), And->getOperand(0).getReg()) - .addReg(ExecReg) - .addReg(CCReg, 0, CC->getSubReg()); + MachineInstr *Andn2 = + BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc), + And->getOperand(0).getReg()) + .addReg(ExecReg) + .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg()); And->eraseFromParent(); LIS->InsertMachineInstrInMaps(*Andn2); diff --git a/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir index a2fec4d298b..0ea085afc40 100644 --- a/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir +++ b/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GCN %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s # Check for regression from assuming an instruction was a copy after # dropping the opcode check. @@ -95,3 +95,26 @@ body: | $exec = S_OR_B64 $exec, %7, implicit-def $scc ... + +# When folding a v_cndmask and a v_cmp in a pattern leading to +# s_cbranch_vccz, ensure that an undef operand is handled correctly. +--- +name: cndmask_cmp_cbranch_fold_undef +tracksRegLiveness: true +body: | + ; GCN-LABEL: name: cndmask_cmp_cbranch_fold_undef + ; GCN: bb.0: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def $scc + ; GCN: S_CBRANCH_VCCZ %bb.1, implicit $vcc + ; GCN: bb.1: + bb.0: + + %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0:sreg_64_xexec, implicit $exec + V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec + $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc + S_CBRANCH_VCCZ %bb.1, implicit $vcc + + bb.1: + +...