AMDGPU: Start selecting s_xnor_{b32, b64}

author Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>

Mon, 18 Sep 2017 21:22:45 +0000 (21:22 +0000)

committer Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>

Mon, 18 Sep 2017 21:22:45 +0000 (21:22 +0000)
author Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Mon, 18 Sep 2017 21:22:45 +0000 (21:22 +0000)
committer Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
Mon, 18 Sep 2017 21:22:45 +0000 (21:22 +0000)
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp

index fd9a50f4f2c224ae940f2e34495a2c0da45f5a6a..075717e1f8b26025a4df7c3fddec514b67e1e4ed 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3686,6 +3686,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
        movePackToVALU(Worklist, MRI, Inst);
        Inst.eraseFromParent();
        continue;
+
+    case AMDGPU::S_XNOR_B32:
+      lowerScalarXnor(Worklist, Inst);
+      Inst.eraseFromParent();
+      continue;
+
+    case AMDGPU::S_XNOR_B64:
+      splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32);
+      Inst.eraseFromParent();
+      continue;
      }
  
      if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
@@ -3804,6 +3814,33 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
    addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist);
  }
  
+void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist,
+                                  MachineInstr &Inst) const {
+  MachineBasicBlock &MBB = *Inst.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  MachineBasicBlock::iterator MII = Inst;
+  const DebugLoc &DL = Inst.getDebugLoc();
+
+  MachineOperand &Dest = Inst.getOperand(0);
+  MachineOperand &Src0 = Inst.getOperand(1);
+  MachineOperand &Src1 = Inst.getOperand(2);
+
+  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL);
+  legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL);
+
+  unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor)
+    .add(Src0)
+    .add(Src1);
+
+  unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not)
+    .addReg(Xor);
+
+  MRI.replaceRegWith(Dest.getReg(), Not);
+  addUsersToMoveToVALUWorklist(Not, MRI, Worklist);
+}
+
  void SIInstrInfo::splitScalar64BitUnaryOp(
      SetVectorType &Worklist, MachineInstr &Inst,
      unsigned Opcode) const {
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h

index 087d4b0a3c9bf1c2dbd1e451a454171c6e7f7ee3..93513e2de159c85da8db13be5e74d05f827cabac 100644 (file)
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -78,6 +78,9 @@ private:
    void lowerScalarAbs(SetVectorType &Worklist,
                        MachineInstr &Inst) const;
  
+  void lowerScalarXnor(SetVectorType &Worklist,
+                       MachineInstr &Inst) const;
+
    void splitScalar64BitUnaryOp(SetVectorType &Worklist,
                                 MachineInstr &Inst, unsigned Opcode) const;
  
diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td

index ec29a66c8bbbe6bfaf9dd12ce625875be911272d..041fec52efe7e5a32b014f6232afe37690fb4710 100644 (file)
--- a/lib/Target/AMDGPU/SOPInstructions.td
+++ b/lib/Target/AMDGPU/SOPInstructions.td
@@ -391,6 +391,14 @@ def S_XOR_B32 : SOP2_32 <"s_xor_b32",
  def S_XOR_B64 : SOP2_64 <"s_xor_b64",
    [(set i64:$sdst, (xor i64:$src0, i64:$src1))]
  >;
+
+def S_XNOR_B32 : SOP2_32 <"s_xnor_b32",
+  [(set i32:$sdst, (not (xor_oneuse i32:$src0, i32:$src1)))]
+>;
+
+def S_XNOR_B64 : SOP2_64 <"s_xnor_b64",
+  [(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))]
+>;
  } // End isCommutable = 1
  
  def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32">;
@@ -401,8 +409,6 @@ def S_NAND_B32 : SOP2_32 <"s_nand_b32">;
  def S_NAND_B64 : SOP2_64 <"s_nand_b64">;
  def S_NOR_B32 : SOP2_32 <"s_nor_b32">;
  def S_NOR_B64 : SOP2_64 <"s_nor_b64">;
-def S_XNOR_B32 : SOP2_32 <"s_xnor_b32">;
-def S_XNOR_B64 : SOP2_64 <"s_xnor_b64">;
  } // End Defs = [SCC]
  
  // Use added complexity so these patterns are preferred to the VALU patterns.
diff --git a/test/CodeGen/AMDGPU/xnor.ll b/test/CodeGen/AMDGPU/xnor.ll

new file mode 100644 (file)

index 0000000..3991e61
--- /dev/null
+++ b/test/CodeGen/AMDGPU/xnor.ll
@@ -0,0 +1,83 @@
+; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s
+; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s
+; RUN: llc -march=amdgcn -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX800 %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s
+
+; GCN-LABEL: {{^}}scalar_xnor_i32_one_use
+; GCN: s_xnor_b32
+define amdgpu_kernel void @scalar_xnor_i32_one_use(
+    i32 addrspace(1)* %r0, i32 %a, i32 %b) {
+entry:
+  %xor = xor i32 %a, %b
+  %r0.val = xor i32 %xor, -1
+  store i32 %r0.val, i32 addrspace(1)* %r0
+  ret void
+}
+
+; GCN-LABEL: {{^}}scalar_xnor_i32_mul_use
+; GCN-NOT: s_xnor_b32
+; GCN: s_xor_b32
+; GCN: s_not_b32
+; GCN: s_add_i32
+define amdgpu_kernel void @scalar_xnor_i32_mul_use(
+    i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) {
+entry:
+  %xor = xor i32 %a, %b
+  %r0.val = xor i32 %xor, -1
+  %r1.val = add i32 %xor, %a
+  store i32 %r0.val, i32 addrspace(1)* %r0
+  store i32 %r1.val, i32 addrspace(1)* %r1
+  ret void
+}
+
+; GCN-LABEL: {{^}}scalar_xnor_i64_one_use
+; GCN: s_xnor_b64
+define amdgpu_kernel void @scalar_xnor_i64_one_use(
+    i64 addrspace(1)* %r0, i64 %a, i64 %b) {
+entry:
+  %xor = xor i64 %a, %b
+  %r0.val = xor i64 %xor, -1
+  store i64 %r0.val, i64 addrspace(1)* %r0
+  ret void
+}
+
+; GCN-LABEL: {{^}}scalar_xnor_i64_mul_use
+; GCN-NOT: s_xnor_b64
+; GCN: s_xor_b64
+; GCN: s_not_b64
+; GCN: s_add_u32
+; GCN: s_addc_u32
+define amdgpu_kernel void @scalar_xnor_i64_mul_use(
+    i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) {
+entry:
+  %xor = xor i64 %a, %b
+  %r0.val = xor i64 %xor, -1
+  %r1.val = add i64 %xor, %a
+  store i64 %r0.val, i64 addrspace(1)* %r0
+  store i64 %r1.val, i64 addrspace(1)* %r1
+  ret void
+}
+
+; GCN-LABEL: {{^}}vector_xnor_i32_one_use
+; GCN-NOT: s_xnor_b32
+; GCN: v_xor_b32
+; GCN: v_not_b32
+define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) {
+entry:
+  %xor = xor i32 %a, %b
+  %r = xor i32 %xor, -1
+  ret i32 %r
+}
+
+; GCN-LABEL: {{^}}vector_xnor_i64_one_use
+; GCN-NOT: s_xnor_b64
+; GCN: v_xor_b32
+; GCN: v_xor_b32
+; GCN: v_not_b32
+; GCN: v_not_b32
+define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) {
+entry:
+  %xor = xor i64 %a, %b
+  %r = xor i64 %xor, -1
+  ret i64 %r
+}
author	Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
	Mon, 18 Sep 2017 21:22:45 +0000 (21:22 +0000)
committer	Konstantin Zhuravlyov <kzhuravl_dev@outlook.com>
	Mon, 18 Sep 2017 21:22:45 +0000 (21:22 +0000)
lib/Target/AMDGPU/SIInstrInfo.cpp		patch \| blob \| history
lib/Target/AMDGPU/SIInstrInfo.h		patch \| blob \| history
lib/Target/AMDGPU/SOPInstructions.td		patch \| blob \| history
test/CodeGen/AMDGPU/xnor.ll	[new file with mode: 0644]	patch \| blob