foreach Ty = [i64, p0, p1, p4] in {
defm : SMRD_Pattern <"S_LOAD_DWORDX2", Ty>;
}
+
+def gi_as_i32timm : GICustomOperandRenderer<"renderTruncImm32">,
+ GISDNodeXFormEquiv<as_i32timm>;
[=](MachineInstrBuilder &MIB) { MIB.addImm(0); }
}};
}
+
+void AMDGPUInstructionSelector::renderTruncImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ assert(MI.getOpcode() == TargetOpcode::G_CONSTANT && "Expected G_CONSTANT");
+ Optional<int64_t> CstVal = getConstantVRegVal(MI.getOperand(0).getReg(), MRI);
+ assert(CstVal && "Expected constant value");
+ MIB.addImm(CstVal.getValue());
+}
InstructionSelector::ComplexRendererFns
selectDS1Addr1Offset(MachineOperand &Root) const;
+ void renderTruncImm32(MachineInstrBuilder &MIB,
+ const MachineInstr &MI) const;
+
const SIInstrInfo &TII;
const SIRegisterInfo &TRI;
const AMDGPURegisterBankInfo &RBI;
OpdsMapping[2] = AMDGPU::getValueMapping(Bank, 32);
break;
}
- case Intrinsic::amdgcn_end_cf: {
+ case Intrinsic::amdgcn_end_cf:
+ case Intrinsic::amdgcn_init_exec: {
unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
break;
OpdsMapping[5] = getSGPROpMapping(MI.getOperand(5).getReg(), MRI, *TRI);
break;
}
+ case Intrinsic::amdgcn_init_exec_from_input: {
+ unsigned Size = getSizeInBits(MI.getOperand(1).getReg(), MRI, *TRI);
+ OpdsMapping[1] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size);
+ break;
+ }
default:
if (const AMDGPU::RsrcIntrinsic *RSrcIntrin =
AMDGPU::lookupRsrcIntrinsic(IntrID)) {
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
}]>;
+def as_i32timm: SDNodeXForm<timm, [{
+ return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i32);
+}]>;
+
def as_i64imm: SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(N->getSExtValue(), SDLoc(N), MVT::i64);
}]>;
[{return isUInt<16>(Imm);}]
>;
+def i64imm_32bit : ImmLeaf<i64, [{
+ return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
+}]>;
+
class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
return isInlineImmediate(N);
}]>;
}
def SI_INIT_EXEC : SPseudoInstSI <
- (outs), (ins i64imm:$src), []> {
+ (outs), (ins i64imm:$src),
+ [(int_amdgcn_init_exec (i64 timm:$src))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
let isAsCheapAsAMove = 1;
let WaveSizePredicate = isWave32;
}
+// FIXME: Wave32 version
def SI_INIT_EXEC_FROM_INPUT : SPseudoInstSI <
- (outs), (ins SSrc_b32:$input, i32imm:$shift), []> {
+ (outs), (ins SSrc_b32:$input, i32imm:$shift),
+ [(int_amdgcn_init_exec_from_input i32:$input, (i32 timm:$shift))]> {
let Defs = [EXEC];
let usesCustomInserter = 1;
}
+def : GCNPat <
+ (int_amdgcn_init_exec timm:$src),
+ (SI_INIT_EXEC_LO (as_i32imm imm:$src))> {
+ let WaveSizePredicate = isWave32;
+}
+
// Return for returning shaders to a shader variant epilog.
def SI_RETURN_TO_EPILOG : SPseudoInstSI <
(outs), (ins variable_ops), [(AMDGPUreturn_to_epilog)]> {
(SI_PC_ADD_REL_OFFSET $ptr_lo, (i32 0))
>;
-def : GCNPat <
- (int_amdgcn_init_exec i64:$src),
- (SI_INIT_EXEC (as_i64imm $src))> {
- let WaveSizePredicate = isWave64;
-}
-
-def : GCNPat <
- (int_amdgcn_init_exec i64:$src),
- (SI_INIT_EXEC_LO (as_i32imm $src))> {
- let WaveSizePredicate = isWave32;
-}
-
-def : GCNPat <
- (int_amdgcn_init_exec_from_input i32:$input, i32:$shift),
- (SI_INIT_EXEC_FROM_INPUT (i32 $input), (as_i32imm $shift))
->;
-
def : GCNPat<
(AMDGPUtrap timm:$trapid),
(S_TRAP $trapid)
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.ll | FileCheck -check-prefix=GCN %S/../llvm.amdgcn.init.exec.ll
--- /dev/null
+; Runs original SDAG test with -global-isel
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %S/../llvm.amdgcn.init.exec.wave32.ll | FileCheck -check-prefixes=GCN,GFX1032 %S/../llvm.amdgcn.init.exec.wave32.ll
-;RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck %s --check-prefix=GCN
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; GCN-LABEL: {{^}}full_mask:
; GCN: s_mov_b64 exec, -1
; GCN: s_bfm_b64 exec, s1, 0
; GCN: s_cmp_eq_u32 s1, 64
; GCN: s_cmov_b64 exec, -1
-; GCN: v_add_u32_e32 v0, s0, v0
+; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
define amdgpu_ps float @reuse_input(i32 inreg %count, i32 %a) {
main_body:
call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 19)
; GCN: s_bfm_b64 exec, s1, 0
; GCN: s_cmp_eq_u32 s1, 64
; GCN: s_cmov_b64 exec, -1
-; GCN: v_add_u32_e32 v0, s0, v0
+; GCN: v_add{{(_nc)?}}_u32_e32 v0, s0, v0
define amdgpu_ps float @reuse_input2(i32 inreg %count, i32 %a) {
main_body:
%s = add i32 %a, %count
--- /dev/null
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX1032 %s
+
+; GCN-LABEL: {{^}}test_init_exec:
+; GFX1032: s_mov_b32 exec_lo, 0x12345
+; GFX1064: s_mov_b64 exec, 0x12345
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @test_init_exec(float %a, float %b) {
+main_body:
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec(i64 74565)
+ ret float %s
+}
+
+; GCN-LABEL: {{^}}test_init_exec_from_input:
+; GCN: s_bfe_u32 s0, s3, 0x70008
+; GFX1032: s_bfm_b32 exec_lo, s0, 0
+; GFX1032: s_cmp_eq_u32 s0, 32
+; GFX1032: s_cmov_b32 exec_lo, -1
+; GFX1064: s_bfm_b64 exec, s0, 0
+; GFX1064: s_cmp_eq_u32 s0, 64
+; GFX1064: s_cmov_b64 exec, -1
+; GCN: v_add_f32_e32 v0,
+define amdgpu_ps float @test_init_exec_from_input(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
+main_body:
+ %s = fadd float %a, %b
+ call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
+ ret float %s
+}
+
+declare void @llvm.amdgcn.init.exec(i64)
+declare void @llvm.amdgcn.init.exec.from.input(i32, i32)
ret void
}
-; GCN-LABEL: {{^}}test_init_exec:
-; GFX1032: s_mov_b32 exec_lo, 0x12345
-; GFX1064: s_mov_b64 exec, 0x12345
-; GCN: v_add_f32_e32 v0,
-define amdgpu_ps float @test_init_exec(float %a, float %b) {
-main_body:
- %s = fadd float %a, %b
- call void @llvm.amdgcn.init.exec(i64 74565)
- ret float %s
-}
-
-; GCN-LABEL: {{^}}test_init_exec_from_input:
-; GCN: s_bfe_u32 s0, s3, 0x70008
-; GFX1032: s_bfm_b32 exec_lo, s0, 0
-; GFX1032: s_cmp_eq_u32 s0, 32
-; GFX1032: s_cmov_b32 exec_lo, -1
-; GFX1064: s_bfm_b64 exec, s0, 0
-; GFX1064: s_cmp_eq_u32 s0, 64
-; GFX1064: s_cmov_b64 exec, -1
-; GCN: v_add_f32_e32 v0,
-define amdgpu_ps float @test_init_exec_from_input(i32 inreg, i32 inreg, i32 inreg, i32 inreg %count, float %a, float %b) {
-main_body:
- %s = fadd float %a, %b
- call void @llvm.amdgcn.init.exec.from.input(i32 %count, i32 8)
- ret float %s
-}
-
; GCN-LABEL: {{^}}test_vgprblocks_w32_attr:
; Test that the wave size can be overridden in function attributes and that the block size is correct as a result
; GFX10DEFWAVE: ; VGPRBlocks: 1
declare void @llvm.amdgcn.kill(i1)
declare i1 @llvm.amdgcn.wqm.vote(i1)
declare i1 @llvm.amdgcn.ps.live()
-declare void @llvm.amdgcn.init.exec(i64)
-declare void @llvm.amdgcn.init.exec.from.input(i32, i32)
declare i64 @llvm.cttz.i64(i64, i1)
declare i32 @llvm.cttz.i32(i32, i1)