NODE_NAME_CASE(ATOMIC_CMP_SWAP)
NODE_NAME_CASE(ATOMIC_INC)
NODE_NAME_CASE(ATOMIC_DEC)
+ NODE_NAME_CASE(BUFFER_LOAD)
+ NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
}
return nullptr;
ATOMIC_CMP_SWAP,
ATOMIC_INC,
ATOMIC_DEC,
+ BUFFER_LOAD,
+ BUFFER_LOAD_FORMAT,
LAST_AMDGPU_ISD_NUMBER
};
>;
}
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, f32, "BUFFER_LOAD_DWORD">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
string opcode> {
SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+ SDLoc DL(Op);
switch (IntrID) {
case Intrinsic::amdgcn_atomic_inc:
case Intrinsic::amdgcn_atomic_dec: {
return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
M->getMemoryVT(), M->getMemOperand());
}
+ case Intrinsic::amdgcn_buffer_load:
+ case Intrinsic::amdgcn_buffer_load_format: {
+ SDValue Ops[] = {
+ Op.getOperand(0), // Chain
+ Op.getOperand(2), // rsrc
+ Op.getOperand(3), // vindex
+ Op.getOperand(4), // offset
+ Op.getOperand(5), // glc
+ Op.getOperand(6) // slc
+ };
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+ unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
+ AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+ EVT VT = Op.getValueType();
+ EVT IntVT = VT.changeTypeToInteger();
+
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo(MFI->getBufferPSV()),
+ MachineMemOperand::MOLoad,
+ VT.getStoreSize(), VT.getStoreSize());
+
+ return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
+ }
default:
return SDValue();
}
[SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>;
+def SDTBufferLoad : SDTypeProfile<1, 5,
+ [ // vdata
+ SDTCisVT<1, v4i32>, // rsrc
+ SDTCisVT<2, i32>, // vindex
+ SDTCisVT<3, i32>, // offset
+ SDTCisVT<4, i1>, // glc
+ SDTCisVT<5, i1>]>; // slc
+
+def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
+ [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+
def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
SDTCisVT<3, i32>]>
}
};
+class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
+public:
+ explicit AMDGPUBufferPseudoSourceValue() :
+ PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+
+ bool isConstant(const MachineFrameInfo *) const override {
+ // This should probably be true for most images, but we will start by being
+ // conservative.
+ return false;
+ }
+
+ bool isAliased(const MachineFrameInfo *) const override {
+ // FIXME: If we ever change image intrinsics to accept fat pointers, then
+ // this could be true for some cases.
+ return false;
+ }
+
+ bool mayAlias(const MachineFrameInfo*) const override {
+ // FIXME: If we ever change image intrinsics to accept fat pointers, then
+ // this could be true for some cases.
+ return false;
+ }
+};
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
/// tells the hardware which interpolation parameters to load.
// Stack object indices for work item IDs.
std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
+ AMDGPUBufferPseudoSourceValue BufferPSV;
std::unique_ptr<AMDGPUImagePseudoSourceValue> ImagePSV;
public:
llvm_unreachable("unexpected dimension");
}
+ const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
+ return &BufferPSV;
+ }
+
AMDGPUImagePseudoSourceValue *getImagePSV() {
return ImagePSV.get();
}
;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen
;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], 61 offset:4095
;VI-DAG: s_movk_i32 [[OFS1:s[0-9]+]], 0x7fff
-;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093
+;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS1]] offset:4093
;SICI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 offen
-;VI: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
-;VI: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1
+;VI-DAG: s_mov_b32 [[OFS2:s[0-9]+]], 0x8fff
+;VI-DAG: buffer_load_format_xyzw {{v\[[0-9]+:[0-9]+\]}}, off, s[0:3], [[OFS2]] offset:1
;CHECK: s_waitcnt
define amdgpu_ps <4 x float> @buffer_load_immoffs_large(<4 x i32> inreg) {
main_body:
ret <4 x float> %data
}
+; SI won't merge ds memory operations, because of the signed offset bug, so
+; we only have check lines for VI.
+; CHECK-LABEL: buffer_load_mmo:
+; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
+; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4
+define amdgpu_ps float @buffer_load_mmo(<4 x i32> inreg %rsrc, float addrspace(3)* %lds) {
+entry:
+ store float 0.0, float addrspace(3)* %lds
+ %val = call float @llvm.amdgcn.buffer.load.f32(<4 x i32> %rsrc, i32 0, i32 0, i1 0, i1 0)
+ %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4
+ store float 0.0, float addrspace(3)* %tmp2
+ ret float %val
+}
+
declare float @llvm.amdgcn.buffer.load.f32(<4 x i32>, i32, i32, i1, i1) #0
declare <2 x float> @llvm.amdgcn.buffer.load.v2f32(<4 x i32>, i32, i32, i1, i1) #0
declare <4 x float> @llvm.amdgcn.buffer.load.v4f32(<4 x i32>, i32, i32, i1, i1) #0