]> granicus.if.org Git - llvm/commitdiff
AMDGPU: Propagate undef flag during pre-RA exec mask optimizations
authorNicolai Haehnle <nhaehnle@gmail.com>
Tue, 8 Oct 2019 12:46:32 +0000 (12:46 +0000)
committerNicolai Haehnle <nhaehnle@gmail.com>
Tue, 8 Oct 2019 12:46:32 +0000 (12:46 +0000)
Summary: Issue: https://github.com/GPUOpen-Drivers/llpc/issues/204

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D68184

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374041 91177308-0d34-0410-b5e6-96231b3b80d8

lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir

index 681c3b35f75deca435394bf36e7712302ab14d20..fdd30db6a7cb5c6937d77a663b7671642c647554 100644 (file)
@@ -250,15 +250,16 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
       Op1->getImm() != 0 || Op2->getImm() != 1)
     return AMDGPU::NoRegister;
 
-  LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t'
-                    << *Cmp << '\t' << *And);
+  LLVM_DEBUG(dbgs() << "Folding sequence:\n\t" << *Sel << '\t' << *Cmp << '\t'
+                    << *And);
 
   Register CCReg = CC->getReg();
   LIS->RemoveMachineInstrFromMaps(*And);
-  MachineInstr *Andn2 = BuildMI(MBB, *And, And->getDebugLoc(),
-                                TII->get(Andn2Opc), And->getOperand(0).getReg())
-                            .addReg(ExecReg)
-                            .addReg(CCReg, 0, CC->getSubReg());
+  MachineInstr *Andn2 =
+      BuildMI(MBB, *And, And->getDebugLoc(), TII->get(Andn2Opc),
+              And->getOperand(0).getReg())
+          .addReg(ExecReg)
+          .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg());
   And->eraseFromParent();
   LIS->InsertMachineInstrInMaps(*Andn2);
 
index a2fec4d298b11c3e968435d61d9fe78d14a1cfd5..0ea085afc40510bba16d5acd0e696b5525cd6ba5 100644 (file)
@@ -1,5 +1,5 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -run-pass=si-optimize-exec-masking-pre-ra -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
 
 # Check for regression from assuming an instruction was a copy after
 # dropping the opcode check.
@@ -95,3 +95,26 @@ body:             |
     $exec = S_OR_B64 $exec, %7, implicit-def $scc
 
 ...
+
+# When folding a v_cndmask and a v_cmp in a pattern leading to
+# s_cbranch_vccz, ensure that an undef operand is handled correctly.
+---
+name: cndmask_cmp_cbranch_fold_undef
+tracksRegLiveness: true
+body:             |
+  ; GCN-LABEL: name: cndmask_cmp_cbranch_fold_undef
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def $scc
+  ; GCN:   S_CBRANCH_VCCZ %bb.1, implicit $vcc
+  ; GCN: bb.1:
+  bb.0:
+
+    %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, undef %0:sreg_64_xexec, implicit $exec
+    V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
+    $vcc = S_AND_B64 $exec, $vcc, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+
+  bb.1:
+
+...