GCCBuiltin<"__builtin_amdgcn_ds_bpermute">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+
+//===----------------------------------------------------------------------===//
+// Special Intrinsics for backend internal use only. No frontend
+// should emit calls to these.
+// ===----------------------------------------------------------------------===//
+def int_amdgcn_if : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
+ [llvm_i1_ty], [IntrConvergent]
+>;
+
+def int_amdgcn_else : Intrinsic<[llvm_i1_ty, llvm_i64_ty],
+ [llvm_i64_ty], [IntrConvergent]
+>;
+
+def int_amdgcn_break : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty], [IntrNoMem, IntrConvergent]
+>;
+
+def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty],
+ [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
+>;
+
+def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty],
+ [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]
+>;
+
+def int_amdgcn_loop : Intrinsic<[llvm_i1_ty],
+ [llvm_i64_ty], [IntrConvergent]
+>;
+
+def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
+
+// Emit 2.5 ulp, no denormal division. Should only be inserted by
+// pass based on !fpmath metadata.
+def int_amdgcn_fdiv_fast : Intrinsic<
+ [llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+>;
}
//===----------------------------------------------------------------------===//
#include "AMDGPU.h"
-#include "AMDGPUIntrinsicInfo.h"
#include "AMDGPUSubtarget.h"
#include "AMDGPUTargetMachine.h"
#include "llvm/ADT/StringRef.h"
Builder.setFastMathFlags(FMF);
Builder.SetCurrentDebugLocation(FDiv.getDebugLoc());
- const AMDGPUIntrinsicInfo *II = TM->getIntrinsicInfo();
- Function *Decl
- = II->getDeclaration(Mod, AMDGPUIntrinsic::amdgcn_fdiv_fast, {});
+ Function *Decl = Intrinsic::getDeclaration(Mod, Intrinsic::amdgcn_fdiv_fast);
Value *Num = FDiv.getOperand(0);
Value *Den = FDiv.getOperand(1);
NODE_NAME_CASE(BRANCH_COND);
// AMDGPU DAG nodes
+ NODE_NAME_CASE(IF)
+ NODE_NAME_CASE(ELSE)
+ NODE_NAME_CASE(LOOP)
NODE_NAME_CASE(ENDPGM)
NODE_NAME_CASE(RETURN)
NODE_NAME_CASE(DWORDADDR)
UMUL, // 32bit unsigned multiplication
BRANCH_COND,
// End AMDIL ISD Opcodes
+
+ // Masked control flow nodes.
+ IF,
+ ELSE,
+ LOOP,
+
ENDPGM,
RETURN,
DWORDADDR,
def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def AMDGPUIfOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, OtherVT>]
+>;
+
+def AMDGPUElseOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, OtherVT>]
+>;
+
+def AMDGPULoopOp : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, OtherVT>]
+>;
+
+def AMDGPUBreakOp : SDTypeProfile<1, 1,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>]
+>;
+
+def AMDGPUIfBreakOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i1>, SDTCisVT<2, i64>]
+>;
+
+def AMDGPUElseBreakOp : SDTypeProfile<1, 2,
+ [SDTCisVT<0, i64>, SDTCisVT<1, i64>, SDTCisVT<2, i64>]
+>;
+
//===----------------------------------------------------------------------===//
// AMDGPU DAG Nodes
//
+def AMDGPUif : SDNode<"AMDGPUISD::IF", AMDGPUIfOp, [SDNPHasChain]>;
+def AMDGPUelse : SDNode<"AMDGPUISD::ELSE", AMDGPUElseOp, [SDNPHasChain]>;
+def AMDGPUloop : SDNode<"AMDGPUISD::LOOP", AMDGPULoopOp, [SDNPHasChain]>;
+
def AMDGPUconstdata_ptr : SDNode<
"AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, iPTR>,
SDTCisVT<0, iPTR>]>
ArrayRef<Type*> Tys) const {
// FIXME: Re-use Intrinsic::getType machinery
switch (ID) {
- case AMDGPUIntrinsic::amdgcn_fdiv_fast: {
- Type *F32Ty = Type::getFloatTy(Context);
- return FunctionType::get(F32Ty, { F32Ty, F32Ty }, false);
- }
default:
llvm_unreachable("unhandled intrinsic");
}
typedef std::pair<BasicBlock *, Value *> StackEntry;
typedef SmallVector<StackEntry, 16> StackVector;
-// Intrinsic names the control flow is annotated with
-static const char *const IfIntrinsic = "llvm.amdgcn.if";
-static const char *const ElseIntrinsic = "llvm.amdgcn.else";
-static const char *const BreakIntrinsic = "llvm.amdgcn.break";
-static const char *const IfBreakIntrinsic = "llvm.amdgcn.if.break";
-static const char *const ElseBreakIntrinsic = "llvm.amdgcn.else.break";
-static const char *const LoopIntrinsic = "llvm.amdgcn.loop";
-static const char *const EndCfIntrinsic = "llvm.amdgcn.end.cf";
-
class SIAnnotateControlFlow : public FunctionPass {
DivergenceAnalysis *DA;
UndefValue *BoolUndef;
Constant *Int64Zero;
- Constant *If;
- Constant *Else;
- Constant *Break;
- Constant *IfBreak;
- Constant *ElseBreak;
- Constant *Loop;
- Constant *EndCf;
+ Function *If;
+ Function *Else;
+ Function *Break;
+ Function *IfBreak;
+ Function *ElseBreak;
+ Function *Loop;
+ Function *EndCf;
DominatorTree *DT;
StackVector Stack;
BoolUndef = UndefValue::get(Boolean);
Int64Zero = ConstantInt::get(Int64, 0);
- If = M.getOrInsertFunction(
- IfIntrinsic, ReturnStruct, Boolean, (Type *)nullptr);
-
- Else = M.getOrInsertFunction(
- ElseIntrinsic, ReturnStruct, Int64, (Type *)nullptr);
-
- Break = M.getOrInsertFunction(
- BreakIntrinsic, Int64, Int64, (Type *)nullptr);
- cast<Function>(Break)->setDoesNotAccessMemory();
-
- IfBreak = M.getOrInsertFunction(
- IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
- cast<Function>(IfBreak)->setDoesNotAccessMemory();;
-
- ElseBreak = M.getOrInsertFunction(
- ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
- cast<Function>(ElseBreak)->setDoesNotAccessMemory();
-
- Loop = M.getOrInsertFunction(
- LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
-
- EndCf = M.getOrInsertFunction(
- EndCfIntrinsic, Void, Int64, (Type *)nullptr);
-
+ If = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if);
+ Else = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else);
+ Break = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_break);
+ IfBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_if_break);
+ ElseBreak = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_else_break);
+ Loop = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_loop);
+ EndCf = Intrinsic::getDeclaration(&M, Intrinsic::amdgcn_end_cf);
return false;
}
return nullptr;
}
-bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
+unsigned SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
- case AMDGPUIntrinsic::amdgcn_if:
- case AMDGPUIntrinsic::amdgcn_else:
- case AMDGPUIntrinsic::amdgcn_end_cf:
- case AMDGPUIntrinsic::amdgcn_loop:
- return true;
- default:
- return false;
- }
- }
-
- if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
- switch (cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue()) {
- case AMDGPUIntrinsic::amdgcn_break:
- case AMDGPUIntrinsic::amdgcn_if_break:
- case AMDGPUIntrinsic::amdgcn_else_break:
- return true;
+ case Intrinsic::amdgcn_if:
+ return AMDGPUISD::IF;
+ case Intrinsic::amdgcn_else:
+ return AMDGPUISD::ELSE;
+ case Intrinsic::amdgcn_loop:
+ return AMDGPUISD::LOOP;
+ case Intrinsic::amdgcn_end_cf:
+ llvm_unreachable("should not occur");
default:
- return false;
+ return 0;
}
}
- return false;
+ // break, if_break, else_break are all only used as inputs to loop, not
+ // directly as branch conditions.
+ return 0;
}
void SITargetLowering::createDebuggerPrologueStackObjects(
// eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3
// => t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch<bb1 0x7fee5286d088>
- if (!isCFIntrinsic(Intr)) {
+ unsigned CFNode = isCFIntrinsic(Intr);
+ if (CFNode == 0) {
// This is a uniform branch so we don't need to legalize.
return BRCOND;
}
if (HaveChain)
Ops.push_back(BRCOND.getOperand(0));
- Ops.append(Intr->op_begin() + (HaveChain ? 1 : 0), Intr->op_end());
+ Ops.append(Intr->op_begin() + (HaveChain ? 2 : 1), Intr->op_end());
Ops.push_back(Target);
ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
// build the new intrinsic call
- SDNode *Result = DAG.getNode(
- Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
- DAG.getVTList(Res), Ops).getNode();
+ SDNode *Result = DAG.getNode(CFNode, DL, DAG.getVTList(Res), Ops).getNode();
if (!HaveChain) {
SDValue Ops[] = {
return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
Op->getVTList(), Ops, VT, MMO);
}
- case AMDGPUIntrinsic::amdgcn_fdiv_fast:
+ case Intrinsic::amdgcn_fdiv_fast:
return lowerFDIV_FAST(Op, DAG);
case AMDGPUIntrinsic::SI_vs_load_input:
return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
- bool isCFIntrinsic(const SDNode *Intr) const;
+ unsigned isCFIntrinsic(const SDNode *Intr) const;
void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
def SI_IF: CFPseudoInstSI <
(outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
- [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {
+ [(set i64:$dst, (AMDGPUif i1:$vcc, bb:$target))], 1, 1> {
let Constraints = "";
let Size = 12;
- let mayLoad = 1;
- let mayStore = 1;
let hasSideEffects = 1;
}
def SI_ELSE : CFPseudoInstSI <
- (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
+ (outs SReg_64:$dst),
+ (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
let Constraints = "$src = $dst";
let Size = 12;
- let mayStore = 1;
- let mayLoad = 1;
let hasSideEffects = 1;
}
def SI_LOOP : CFPseudoInstSI <
(outs), (ins SReg_64:$saved, brtarget:$target),
- [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> {
+ [(AMDGPUloop i64:$saved, bb:$target)], 1, 1> {
let Size = 8;
- let isBranch = 1;
+ let isBranch = 0;
let hasSideEffects = 1;
- let mayLoad = 1;
- let mayStore = 1;
}
-} // End isBranch = 1, isTerminator = 1
+} // End isTerminator = 1
def SI_END_CF : CFPseudoInstSI <
(outs), (ins SReg_64:$saved),
let Size = 4;
let isAsCheapAsAMove = 1;
let isReMaterializable = 1;
- let mayLoad = 1;
- let mayStore = 1;
let hasSideEffects = 1;
+ let mayLoad = 1; // FIXME: Should not need memory flags
+ let mayStore = 1;
}
def SI_BREAK : CFPseudoInstSI <
>;
def : Pat<
- (int_amdgcn_else i64:$src, bb:$target),
+ (AMDGPUelse i64:$src, bb:$target),
(SI_ELSE $src, $target, 0)
>;
def int_SI_image_load_mip : Image;
def int_SI_getresinfo : Image;
} // End TargetPrefix = "SI", isTarget = 1
-
-let TargetPrefix = "amdgcn", isTarget = 1 in {
- // Emit 2.5 ulp, no denormal division. Should only be inserted by
- // pass based on !fpmath metadata.
- def int_amdgcn_fdiv_fast : Intrinsic<
- [llvm_float_ty], [llvm_float_ty], [IntrNoMem]
- >;
-
- /* Control flow Intrinsics */
-
- def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
- def int_amdgcn_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
- def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
-}