From 3a74bac0211c9864c02673008d91e1430da5cdcc Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 16 Sep 2016 21:41:16 +0000 Subject: [PATCH] AMDGPU: Use SOPK compare instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281780 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/AMDGPU/SIDefines.h | 3 +- lib/Target/AMDGPU/SIInstrFormats.td | 5 + lib/Target/AMDGPU/SIInstrInfo.cpp | 15 + lib/Target/AMDGPU/SIInstrInfo.h | 11 + lib/Target/AMDGPU/SIInstrInfo.td | 45 +- lib/Target/AMDGPU/SIShrinkInstructions.cpp | 64 +++ lib/Target/AMDGPU/SOPInstructions.td | 60 ++- test/CodeGen/AMDGPU/si-annotate-cf.ll | 2 +- test/CodeGen/AMDGPU/sopk-compares.ll | 573 +++++++++++++++++++++ 9 files changed, 726 insertions(+), 52 deletions(-) create mode 100644 test/CodeGen/AMDGPU/sopk-compares.ll diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h index 5c29f82fa23..643b8722d91 100644 --- a/lib/Target/AMDGPU/SIDefines.h +++ b/lib/Target/AMDGPU/SIDefines.h @@ -43,7 +43,8 @@ enum { SGPRSpill = 1 << 24, VOPAsmPrefer32Bit = 1 << 25, Gather4 = 1 << 26, - DisableWQM = 1 << 27 + DisableWQM = 1 << 27, + SOPK_ZEXT = 1 << 28 }; } diff --git a/lib/Target/AMDGPU/SIInstrFormats.td b/lib/Target/AMDGPU/SIInstrFormats.td index aa15e09085a..33b9b122e2e 100644 --- a/lib/Target/AMDGPU/SIInstrFormats.td +++ b/lib/Target/AMDGPU/SIInstrFormats.td @@ -56,6 +56,10 @@ class InstSI DisableWQM = 0; + // Most sopk treat the immediate as a signed 16-bit, however some + // use it as unsigned. + field bits<1> SOPKZext = 0; + // These need to be kept in sync with the enum in SIInstrFlags. let TSFlags{0} = VM_CNT; let TSFlags{1} = EXP_CNT; @@ -89,6 +93,7 @@ class InstSI getImm(); + if (sopkIsZext(MI)) { + if (!isUInt<16>(Imm)) { + ErrInfo = "invalid immediate for SOPK instruction"; + return false; + } + } else { + if (!isInt<16>(Imm)) { + ErrInfo = "invalid immediate for SOPK instruction"; + return false; + } + } + } + if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 || Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 || Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 || diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h index aeeee7d57e1..11fe8c90396 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.h +++ b/lib/Target/AMDGPU/SIInstrInfo.h @@ -391,6 +391,14 @@ public: return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT; } + static bool sopkIsZext(const MachineInstr &MI) { + return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT; + } + + bool sopkIsZext(uint16_t Opcode) const { + return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT; + } + bool isVGPRCopy(const MachineInstr &MI) const { assert(MI.isCopy()); unsigned Dest = MI.getOperand(0).getReg(); @@ -603,6 +611,9 @@ namespace AMDGPU { LLVM_READONLY int getAtomicNoRetOp(uint16_t Opcode); + LLVM_READONLY + int getSOPKOp(uint16_t Opcode); + const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL; const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19); const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21); diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index 27b53c9e151..37f8f17bff3 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -1193,7 +1193,7 @@ class VOP { string OpName = opName; } -class VOP2_REV { +class Commutable_REV { string RevOp = revOp; bit IsOrig = isOrig; } @@ -1325,7 +1325,7 @@ multiclass VOP2SI_m pattern, string revOp> { def "" : VOP2_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP2_Real_si ; } @@ -1334,7 +1334,7 @@ multiclass VOP2_m pattern, string revOp> { def "" : VOP2_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP2_Real_si ; @@ -1523,7 +1523,7 @@ multiclass VOP3_2_m { def "" : VOP3_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; @@ -1537,7 +1537,7 @@ multiclass VOP3SI_2_m { def "" : VOP3_Pseudo , - VOP2_REV; + Commutable_REV; def _si : VOP3_Real_si , VOP3DisableFields<1, 0, HasMods>; @@ -1578,7 +1578,7 @@ multiclass VOP3_C_m sched> { def "" : VOP3_Pseudo , - VOP2_REV { + Commutable_REV { let Defs = !if(defExec, [EXEC], []); let SchedRW = sched; } @@ -1829,7 +1829,7 @@ multiclass VOPC_m pattern, string revOpName = "", string asm = opName#"_e32 "#op_asm, string alias_asm = opName#" "#op_asm> { def "" : VOPC_Pseudo , - VOP2_REV { + Commutable_REV { let Defs = !if(DefExec, [VCC, EXEC], [VCC]); let SchedRW = sched; let isConvergent = DefExec; @@ -2106,7 +2106,7 @@ def getMaskedMIMGOp : InstrMapping { // Maps an commuted opcode to its original version def getCommuteOrig : InstrMapping { - let FilterClass = "VOP2_REV"; + let FilterClass = "Commutable_REV"; let RowFields = ["RevOp"]; let ColFields = ["IsOrig"]; let KeyCol = ["0"]; @@ -2115,31 +2115,13 @@ def getCommuteOrig : InstrMapping { // Maps an original opcode to its commuted version def getCommuteRev : InstrMapping { - let FilterClass = "VOP2_REV"; + let FilterClass = "Commutable_REV"; let RowFields = ["RevOp"]; let ColFields = ["IsOrig"]; let KeyCol = ["1"]; let ValueCols = [["0"]]; } -def getCommuteCmpOrig : InstrMapping { - let FilterClass = "VOP2_REV"; - let RowFields = ["RevOp"]; - let ColFields = ["IsOrig"]; - let KeyCol = ["0"]; - let ValueCols = [["1"]]; -} - -// Maps an original opcode to its commuted version -def getCommuteCmpRev : InstrMapping { - let FilterClass = "VOP2_REV"; - let RowFields = ["RevOp"]; - let ColFields = ["IsOrig"]; - let KeyCol = ["1"]; - let ValueCols = [["0"]]; -} - - def getMCOpcodeGen : InstrMapping { let FilterClass = "SIMCInstr"; let RowFields = ["PseudoInstr"]; @@ -2149,6 +2131,15 @@ def getMCOpcodeGen : InstrMapping { [!cast(SIEncodingFamily.VI)]]; } +// Get equivalent SOPK instruction. +def getSOPKOp : InstrMapping { + let FilterClass = "SOPKInstTable"; + let RowFields = ["BaseCmpOp"]; + let ColFields = ["IsSOPK"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + def getAddr64Inst : InstrMapping { let FilterClass = "MUBUFAddr64Table"; let RowFields = ["OpName"]; diff --git a/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/lib/Target/AMDGPU/SIShrinkInstructions.cpp index e72b7d496ab..1a0f7d41a1f 100644 --- a/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -188,6 +188,26 @@ static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4); } +static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { + return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4); +} + +static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, + const MachineOperand &Src, + bool &IsUnsigned) { + if (isInt<16>(Src.getImm())) { + IsUnsigned = false; + return !TII->isInlineConstant(Src, 4); + } + + if (isUInt<16>(Src.getImm())) { + IsUnsigned = true; + return !TII->isInlineConstant(Src, 4); + } + + return false; +} + /// Copy implicit register operands from specified instruction to this /// instruction that are not part of the instruction definition. static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, @@ -202,6 +222,44 @@ static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, } } +static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { + // cmpk instructions do scc = dst imm16, so commute the instruction to + // get constants on the RHS. + if (!MI.getOperand(0).isReg()) + TII->commuteInstruction(MI, false, 0, 1); + + const MachineOperand &Src1 = MI.getOperand(1); + if (!Src1.isImm()) + return; + + int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode()); + if (SOPKOpc == -1) + return; + + // eq/ne is special because the imm16 can be treated as signed or unsigned, + // and initially selectd to the signed versions. + if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) { + bool HasUImm; + if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) { + if (HasUImm) { + SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ? + AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32; + } + + MI.setDesc(TII->get(SOPKOpc)); + } + + return; + } + + const MCInstrDesc &NewDesc = TII->get(SOPKOpc); + + if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) || + (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) { + MI.setDesc(NewDesc); + } +} + bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(*MF.getFunction())) return false; @@ -310,6 +368,12 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { } } + // Try to use s_cmpk_* + if (MI.isCompare() && TII->isSOPC(MI)) { + shrinkScalarCompare(TII, MI); + continue; + } + // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. if (MI.getOpcode() == AMDGPU::S_MOV_B32) { const MachineOperand &Src = MI.getOperand(1); diff --git a/lib/Target/AMDGPU/SOPInstructions.td b/lib/Target/AMDGPU/SOPInstructions.td index 7226f2002c4..4451d1642f5 100644 --- a/lib/Target/AMDGPU/SOPInstructions.td +++ b/lib/Target/AMDGPU/SOPInstructions.td @@ -473,6 +473,11 @@ class SOPK_Real64 op, SOPK_Pseudo ps> : let Inst{63-32} = imm; } +class SOPKInstTable { + bit IsSOPK = is_sopk; + string BaseCmpOp = cmpOp; +} + class SOPK_32 pattern=[]> : SOPK_Pseudo < opName, (outs SReg_32:$sdst), @@ -480,12 +485,12 @@ class SOPK_32 pattern=[]> : SOPK_Pseudo < "$sdst, $simm16", pattern>; -class SOPK_SCC pattern=[]> : SOPK_Pseudo < +class SOPK_SCC : SOPK_Pseudo < opName, (outs), (ins SReg_32:$sdst, u16imm:$simm16), - "$sdst, $simm16", - pattern> { + "$sdst, $simm16", []>, + SOPKInstTable<1, base_op>{ let Defs = [SCC]; } @@ -521,18 +526,21 @@ let isCompare = 1 in { // [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))] // >; -def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32">; -def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32">; -def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32">; -def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32">; -def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32">; -def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32">; -def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32">; -def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32">; -def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32">; -def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32">; -def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32">; -def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32">; +def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">; +def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">; +def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">; +def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">; +def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">; +def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">; + +let SOPKZext = 1 in { +def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">; +def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">; +def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">; +def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">; +def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">; +def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">; +} // End SOPKZext = 1 } // End isCompare = 1 let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0", @@ -613,8 +621,14 @@ class SOPC_Helper op, RegisterOperand rc, ValueType vt, [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > { } -class SOPC_CMP_32 op, string opName, PatLeaf cond = COND_NULL> - : SOPC_Helper; +class SOPC_CMP_32 op, string opName, + PatLeaf cond = COND_NULL, string revOp = opName> + : SOPC_Helper, + Commutable_REV, + SOPKInstTable<0, opName> { + let isCompare = 1; + let isCommutable = 1; +} class SOPC_32 op, string opName, list pattern = []> : SOPC_Base; @@ -622,19 +636,19 @@ class SOPC_32 op, string opName, list pattern = []> class SOPC_64_32 op, string opName, list pattern = []> : SOPC_Base; - def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>; def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>; def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>; def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>; -def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT>; -def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE>; +def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">; +def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">; def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>; -def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE >; +def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>; def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>; def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>; -def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT>; -def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE>; +def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">; +def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">; + def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">; def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">; def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">; diff --git a/test/CodeGen/AMDGPU/si-annotate-cf.ll b/test/CodeGen/AMDGPU/si-annotate-cf.ll index 133fd480e59..1a4c2259559 100644 --- a/test/CodeGen/AMDGPU/si-annotate-cf.ll +++ b/test/CodeGen/AMDGPU/si-annotate-cf.ll @@ -92,7 +92,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone ; SI: s_cmp_gt_i32 ; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]] -; SI: s_cmp_gt_i32 +; SI: s_cmpk_gt_i32 ; SI-NEXT: s_cbranch_scc1 [[ENDPGM]] ; SI: [[INFLOOP:BB[0-9]+_[0-9]+]] diff --git a/test/CodeGen/AMDGPU/sopk-compares.ll b/test/CodeGen/AMDGPU/sopk-compares.ll new file mode 100644 index 00000000000..570ffb281da --- /dev/null +++ b/test/CodeGen/AMDGPU/sopk-compares.ll @@ -0,0 +1,573 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s + +; Since this intrinsic is exposed as a constant after isel, use it to +; defeat the DAG's compare with constant canonicalizations. +declare i32 @llvm.amdgcn.groupstaticsize() #1 + +@lds = addrspace(3) global [512 x i32] undef, align 4 + +; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm: +; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}} +define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 4 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}} +define void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 32767 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1: +; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1: +; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ne i32 %cond, 32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, -32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1: +; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}} +define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, -32769 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max: +; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}} +define void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65535 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max: +; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}} +define void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65535 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1: +; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}} +define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65536 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + + +; GCN-LABEL: {{^}}br_scc_eq_i32: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp eq i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ne_i32: +; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ne i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sgt_i32: +; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp sgt i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max: +; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}} +define void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp sgt i32 %cond, 32767 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1: +; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}} +define void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp sgt i32 %cond, 32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sge_i32: +; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sge i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_slt_i32: +; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp slt i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_sle_i32: +; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sle i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ugt_i32: +; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ugt i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_uge_i32: +; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp uge i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32: +; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}} +define void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ult i32 %cond, 65 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16: +; GCN: s_cmp_lt_u32 s2, 0xffff8000 +define void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ult i32 %cond, -32768 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1: +; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}} +define void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %cmp0 = icmp ult i32 %cond, -32769 + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "", ""() + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ule_i32: +; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}} +define void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ule i32 %cond, %size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_eq_i32: +; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp eq i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ne_i32: +; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ne i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_sgt_i32: +; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sgt i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_sge_i32: +; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sge i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_slt_i32: +; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp slt i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_sle_i32: +; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp sle i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ugt_i32: +; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ugt i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_uge_i32: +; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp uge i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ult_i32: +; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ult i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}commute_br_scc_ule_i32: +; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}} +define void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %cmp0 = icmp ule i32 %size, %cond + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +; GCN-LABEL: {{^}}br_scc_ult_i32_non_u16: +; GCN: s_cmp_lt_u32 s2, 0xfffff7ff +define void @br_scc_ult_i32_non_u16(i32 %cond, i32 addrspace(1)* %out) #0 { +entry: + %size = call i32 @llvm.amdgcn.groupstaticsize() + %not.size = xor i32 %size, -1 + %cmp0 = icmp ult i32 %cond, %not.size + br i1 %cmp0, label %endif, label %if + +if: + call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds) + br label %endif + +endif: + store volatile i32 1, i32 addrspace(1)* %out + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } -- 2.50.1