From: Konstantin Zhuravlyov Date: Mon, 18 Sep 2017 21:22:45 +0000 (+0000) Subject: AMDGPU: Start selecting s_xnor_{b32, b64} X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=fe0a82a17cb644fc2f98f264a53b0bdcbe1489d3;p=llvm AMDGPU: Start selecting s_xnor_{b32, b64} Differential Revision: https://reviews.llvm.org/D37981 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@313565 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index fd9a50f4f2c..075717e1f8b 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3686,6 +3686,16 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { movePackToVALU(Worklist, MRI, Inst); Inst.eraseFromParent(); continue; + + case AMDGPU::S_XNOR_B32: + lowerScalarXnor(Worklist, Inst); + Inst.eraseFromParent(); + continue; + + case AMDGPU::S_XNOR_B64: + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XNOR_B32); + Inst.eraseFromParent(); + continue; } if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) { @@ -3804,6 +3814,33 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist, addUsersToMoveToVALUWorklist(ResultReg, MRI, Worklist); } +void SIInstrInfo::lowerScalarXnor(SetVectorType &Worklist, + MachineInstr &Inst) const { + MachineBasicBlock &MBB = *Inst.getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + MachineBasicBlock::iterator MII = Inst; + const DebugLoc &DL = Inst.getDebugLoc(); + + MachineOperand &Dest = Inst.getOperand(0); + MachineOperand &Src0 = Inst.getOperand(1); + MachineOperand &Src1 = Inst.getOperand(2); + + legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src0, MRI, DL); + legalizeGenericOperand(MBB, MII, &AMDGPU::VGPR_32RegClass, Src1, MRI, DL); + + unsigned Xor = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MII, DL, get(AMDGPU::V_XOR_B32_e64), Xor) + .add(Src0) + .add(Src1); + + unsigned Not = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + BuildMI(MBB, MII, DL, get(AMDGPU::V_NOT_B32_e64), Not) + .addReg(Xor); + + MRI.replaceRegWith(Dest.getReg(), Not); + addUsersToMoveToVALUWorklist(Not, MRI, Worklist); +} + void SIInstrInfo::splitScalar64BitUnaryOp( SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const { diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index 087d4b0a3c9..93513e2de15 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -78,6 +78,9 @@ private: void lowerScalarAbs(SetVectorType &Worklist, MachineInstr &Inst) const; + void lowerScalarXnor(SetVectorType &Worklist, + MachineInstr &Inst) const; + void splitScalar64BitUnaryOp(SetVectorType &Worklist, MachineInstr &Inst, unsigned Opcode) const; diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index ec29a66c8bb..041fec52efe 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -391,6 +391,14 @@ def S_XOR_B32 : SOP2_32 <"s_xor_b32", def S_XOR_B64 : SOP2_64 <"s_xor_b64", [(set i64:$sdst, (xor i64:$src0, i64:$src1))] >; + +def S_XNOR_B32 : SOP2_32 <"s_xnor_b32", + [(set i32:$sdst, (not (xor_oneuse i32:$src0, i32:$src1)))] +>; + +def S_XNOR_B64 : SOP2_64 <"s_xnor_b64", + [(set i64:$sdst, (not (xor_oneuse i64:$src0, i64:$src1)))] +>; } // End isCommutable = 1 def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32">; @@ -401,8 +409,6 @@ def S_NAND_B32 : SOP2_32 <"s_nand_b32">; def S_NAND_B64 : SOP2_64 <"s_nand_b64">; def S_NOR_B32 : SOP2_32 <"s_nor_b32">; def S_NOR_B64 : SOP2_64 <"s_nor_b64">; -def S_XNOR_B32 : SOP2_32 <"s_xnor_b32">; -def S_XNOR_B64 : SOP2_64 <"s_xnor_b64">; } // End Defs = [SCC] // Use added complexity so these patterns are preferred to the VALU patterns. diff --git a/test/CodeGen/AMDGPU/xnor.ll b/test/CodeGen/AMDGPU/xnor.ll new file mode 100644 index 00000000000..3991e615599 --- /dev/null +++ b/test/CodeGen/AMDGPU/xnor.ll @@ -0,0 +1,83 @@ +; RUN: llc -march=amdgcn -mcpu=gfx600 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX600 %s +; RUN: llc -march=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX700 %s +; RUN: llc -march=amdgcn -mcpu=gfx800 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX800 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefix=GCN --check-prefix=GFX900 %s + +; GCN-LABEL: {{^}}scalar_xnor_i32_one_use +; GCN: s_xnor_b32 +define amdgpu_kernel void @scalar_xnor_i32_one_use( + i32 addrspace(1)* %r0, i32 %a, i32 %b) { +entry: + %xor = xor i32 %a, %b + %r0.val = xor i32 %xor, -1 + store i32 %r0.val, i32 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_xnor_i32_mul_use +; GCN-NOT: s_xnor_b32 +; GCN: s_xor_b32 +; GCN: s_not_b32 +; GCN: s_add_i32 +define amdgpu_kernel void @scalar_xnor_i32_mul_use( + i32 addrspace(1)* %r0, i32 addrspace(1)* %r1, i32 %a, i32 %b) { +entry: + %xor = xor i32 %a, %b + %r0.val = xor i32 %xor, -1 + %r1.val = add i32 %xor, %a + store i32 %r0.val, i32 addrspace(1)* %r0 + store i32 %r1.val, i32 addrspace(1)* %r1 + ret void +} + +; GCN-LABEL: {{^}}scalar_xnor_i64_one_use +; GCN: s_xnor_b64 +define amdgpu_kernel void @scalar_xnor_i64_one_use( + i64 addrspace(1)* %r0, i64 %a, i64 %b) { +entry: + %xor = xor i64 %a, %b + %r0.val = xor i64 %xor, -1 + store i64 %r0.val, i64 addrspace(1)* %r0 + ret void +} + +; GCN-LABEL: {{^}}scalar_xnor_i64_mul_use +; GCN-NOT: s_xnor_b64 +; GCN: s_xor_b64 +; GCN: s_not_b64 +; GCN: s_add_u32 +; GCN: s_addc_u32 +define amdgpu_kernel void @scalar_xnor_i64_mul_use( + i64 addrspace(1)* %r0, i64 addrspace(1)* %r1, i64 %a, i64 %b) { +entry: + %xor = xor i64 %a, %b + %r0.val = xor i64 %xor, -1 + %r1.val = add i64 %xor, %a + store i64 %r0.val, i64 addrspace(1)* %r0 + store i64 %r1.val, i64 addrspace(1)* %r1 + ret void +} + +; GCN-LABEL: {{^}}vector_xnor_i32_one_use +; GCN-NOT: s_xnor_b32 +; GCN: v_xor_b32 +; GCN: v_not_b32 +define i32 @vector_xnor_i32_one_use(i32 %a, i32 %b) { +entry: + %xor = xor i32 %a, %b + %r = xor i32 %xor, -1 + ret i32 %r +} + +; GCN-LABEL: {{^}}vector_xnor_i64_one_use +; GCN-NOT: s_xnor_b64 +; GCN: v_xor_b32 +; GCN: v_xor_b32 +; GCN: v_not_b32 +; GCN: v_not_b32 +define i64 @vector_xnor_i64_one_use(i64 %a, i64 %b) { +entry: + %xor = xor i64 %a, %b + %r = xor i64 %xor, -1 + ret i64 %r +}