From a6448ac97c5b18f940055501152a06fdfd875997 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Fri, 29 Dec 2017 17:18:14 +0000 Subject: [PATCH] AMDGPU: Implement getTgtMemIntrinsic for images Currently all images are lowered to have a single image PseudoSourceValue. Image stores happen to have overly strict mayLoad/mayStore/hasSideEffects flags set on them, so this happens to work. When these are fixed to be correct, the scheduler breaks this because the identical PSVs are assumed to be the same address. These need to be unique to the image resource value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@321555 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsAMDGPU.td | 12 +- lib/Target/AMDGPU/SIISelLowering.cpp | 169 ++++++++++++++++++-- lib/Target/AMDGPU/SIMachineFunctionInfo.cpp | 1 - lib/Target/AMDGPU/SIMachineFunctionInfo.h | 19 ++- 4 files changed, 176 insertions(+), 25 deletions(-) diff --git a/include/llvm/IR/IntrinsicsAMDGPU.td b/include/llvm/IR/IntrinsicsAMDGPU.td index cc08fe68327..bd570c86b72 100644 --- a/include/llvm/IR/IntrinsicsAMDGPU.td +++ b/include/llvm/IR/IntrinsicsAMDGPU.td @@ -304,7 +304,8 @@ class AMDGPUImageLoad : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem])>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; def int_amdgcn_image_load : AMDGPUImageLoad; def int_amdgcn_image_load_mip : AMDGPUImageLoad; @@ -320,7 +321,7 @@ class AMDGPUImageStore : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - []>; + [IntrWriteMem], "", [SDNPMemOperand]>; def int_amdgcn_image_store : AMDGPUImageStore; def int_amdgcn_image_store_mip : AMDGPUImageStore; @@ -336,7 +337,8 @@ class AMDGPUImageSample : Intrinsic < llvm_i1_ty, // slc(imm) llvm_i1_ty, // lwe(imm) llvm_i1_ty], // da(imm) - !if(NoMem, [IntrNoMem], [IntrReadMem])>; + !if(NoMem, [IntrNoMem], [IntrReadMem]), "", + !if(NoMem, [], [SDNPMemOperand])>; // Basic sample def int_amdgcn_image_sample : AMDGPUImageSample; @@ -428,7 +430,7 @@ class AMDGPUImageAtomic : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; def int_amdgcn_image_atomic_swap : AMDGPUImageAtomic; def int_amdgcn_image_atomic_add : AMDGPUImageAtomic; @@ -451,7 +453,7 @@ def int_amdgcn_image_atomic_cmpswap : Intrinsic < llvm_i1_ty, // r128(imm) llvm_i1_ty, // da(imm) llvm_i1_ty], // slc(imm) - []>; + [], "", [SDNPMemOperand]>; class AMDGPUBufferLoad : Intrinsic < [llvm_anyfloat_ty], diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 50ee88fa635..9c96fb8737d 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -575,6 +575,157 @@ bool SITargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, return true; } + + // Image load. + case Intrinsic::amdgcn_image_load: + case Intrinsic::amdgcn_image_load_mip: + + // Sample. + case Intrinsic::amdgcn_image_sample: + case Intrinsic::amdgcn_image_sample_cl: + case Intrinsic::amdgcn_image_sample_d: + case Intrinsic::amdgcn_image_sample_d_cl: + case Intrinsic::amdgcn_image_sample_l: + case Intrinsic::amdgcn_image_sample_b: + case Intrinsic::amdgcn_image_sample_b_cl: + case Intrinsic::amdgcn_image_sample_lz: + case Intrinsic::amdgcn_image_sample_cd: + case Intrinsic::amdgcn_image_sample_cd_cl: + + // Sample with comparison. + case Intrinsic::amdgcn_image_sample_c: + case Intrinsic::amdgcn_image_sample_c_cl: + case Intrinsic::amdgcn_image_sample_c_d: + case Intrinsic::amdgcn_image_sample_c_d_cl: + case Intrinsic::amdgcn_image_sample_c_l: + case Intrinsic::amdgcn_image_sample_c_b: + case Intrinsic::amdgcn_image_sample_c_b_cl: + case Intrinsic::amdgcn_image_sample_c_lz: + case Intrinsic::amdgcn_image_sample_c_cd: + case Intrinsic::amdgcn_image_sample_c_cd_cl: + + // Sample with offsets. + case Intrinsic::amdgcn_image_sample_o: + case Intrinsic::amdgcn_image_sample_cl_o: + case Intrinsic::amdgcn_image_sample_d_o: + case Intrinsic::amdgcn_image_sample_d_cl_o: + case Intrinsic::amdgcn_image_sample_l_o: + case Intrinsic::amdgcn_image_sample_b_o: + case Intrinsic::amdgcn_image_sample_b_cl_o: + case Intrinsic::amdgcn_image_sample_lz_o: + case Intrinsic::amdgcn_image_sample_cd_o: + case Intrinsic::amdgcn_image_sample_cd_cl_o: + + // Sample with comparison and offsets. + case Intrinsic::amdgcn_image_sample_c_o: + case Intrinsic::amdgcn_image_sample_c_cl_o: + case Intrinsic::amdgcn_image_sample_c_d_o: + case Intrinsic::amdgcn_image_sample_c_d_cl_o: + case Intrinsic::amdgcn_image_sample_c_l_o: + case Intrinsic::amdgcn_image_sample_c_b_o: + case Intrinsic::amdgcn_image_sample_c_b_cl_o: + case Intrinsic::amdgcn_image_sample_c_lz_o: + case Intrinsic::amdgcn_image_sample_c_cd_o: + case Intrinsic::amdgcn_image_sample_c_cd_cl_o: + + // Basic gather4 + case Intrinsic::amdgcn_image_gather4: + case Intrinsic::amdgcn_image_gather4_cl: + case Intrinsic::amdgcn_image_gather4_l: + case Intrinsic::amdgcn_image_gather4_b: + case Intrinsic::amdgcn_image_gather4_b_cl: + case Intrinsic::amdgcn_image_gather4_lz: + + // Gather4 with comparison + case Intrinsic::amdgcn_image_gather4_c: + case Intrinsic::amdgcn_image_gather4_c_cl: + case Intrinsic::amdgcn_image_gather4_c_l: + case Intrinsic::amdgcn_image_gather4_c_b: + case Intrinsic::amdgcn_image_gather4_c_b_cl: + case Intrinsic::amdgcn_image_gather4_c_lz: + + // Gather4 with offsets + case Intrinsic::amdgcn_image_gather4_o: + case Intrinsic::amdgcn_image_gather4_cl_o: + case Intrinsic::amdgcn_image_gather4_l_o: + case Intrinsic::amdgcn_image_gather4_b_o: + case Intrinsic::amdgcn_image_gather4_b_cl_o: + case Intrinsic::amdgcn_image_gather4_lz_o: + + // Gather4 with comparison and offsets + case Intrinsic::amdgcn_image_gather4_c_o: + case Intrinsic::amdgcn_image_gather4_c_cl_o: + case Intrinsic::amdgcn_image_gather4_c_l_o: + case Intrinsic::amdgcn_image_gather4_c_b_o: + case Intrinsic::amdgcn_image_gather4_c_b_cl_o: + case Intrinsic::amdgcn_image_gather4_c_lz_o: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(1)); + Info.align = 0; + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MODereferenceable; + return true; + } + case Intrinsic::amdgcn_image_store: + case Intrinsic::amdgcn_image_store_mip: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_VOID; + Info.memVT = MVT::getVT(CI.getArgOperand(0)->getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + Info.flags = MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + Info.align = 0; + return true; + } + case Intrinsic::amdgcn_image_atomic_swap: + case Intrinsic::amdgcn_image_atomic_add: + case Intrinsic::amdgcn_image_atomic_sub: + case Intrinsic::amdgcn_image_atomic_smin: + case Intrinsic::amdgcn_image_atomic_umin: + case Intrinsic::amdgcn_image_atomic_smax: + case Intrinsic::amdgcn_image_atomic_umax: + case Intrinsic::amdgcn_image_atomic_and: + case Intrinsic::amdgcn_image_atomic_or: + case Intrinsic::amdgcn_image_atomic_xor: + case Intrinsic::amdgcn_image_atomic_inc: + case Intrinsic::amdgcn_image_atomic_dec: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(2)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } + case Intrinsic::amdgcn_image_atomic_cmpswap: { + SIMachineFunctionInfo *MFI = MF.getInfo(); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(CI.getType()); + Info.ptrVal = MFI->getImagePSV( + *MF.getSubtarget().getInstrInfo(), + CI.getArgOperand(3)); + + Info.flags = MachineMemOperand::MOLoad | + MachineMemOperand::MOStore | + MachineMemOperand::MODereferenceable; + + // XXX - Should this be volatile without known ordering? + Info.flags |= MachineMemOperand::MOVolatile; + return true; + } default: return false; } @@ -2946,24 +3097,12 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter( SIMachineFunctionInfo *MFI = MF->getInfo(); if (TII->isMIMG(MI)) { - if (!MI.memoperands_empty()) - return BB; + if (MI.memoperands_empty() && MI.mayLoadOrStore()) { + report_fatal_error("missing mem operand from MIMG instruction"); + } // Add a memoperand for mimg instructions so that they aren't assumed to // be ordered memory instuctions. - MachinePointerInfo PtrInfo(MFI->getImagePSV()); - MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable; - if (MI.mayStore()) - Flags |= MachineMemOperand::MOStore; - - if (MI.mayLoad()) - Flags |= MachineMemOperand::MOLoad; - - if (Flags != MachineMemOperand::MODereferenceable) { - auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0); - MI.addMemOperand(*MF, MMO); - } - return BB; } diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp index 6013ebc81d9..68acfa4c6ea 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp @@ -29,7 +29,6 @@ using namespace llvm; SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF) : AMDGPUMachineFunction(MF), BufferPSV(*(MF.getSubtarget().getInstrInfo())), - ImagePSV(*(MF.getSubtarget().getInstrInfo())), PrivateSegmentBuffer(false), DispatchPtr(false), QueuePtr(false), diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h index 5dde72910ee..7ac755be04d 100644 --- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h +++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h @@ -34,12 +34,14 @@ namespace llvm { class MachineFrameInfo; class MachineFunction; +class SIInstrInfo; class TargetRegisterClass; class AMDGPUImagePseudoSourceValue : public PseudoSourceValue { public: + // TODO: Is the img rsrc useful? explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) : - PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { } + PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {} bool isConstant(const MachineFrameInfo *) const override { // This should probably be true for most images, but we will start by being @@ -136,7 +138,10 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction { std::array DebuggerWorkItemIDStackObjectIndices = {{0, 0, 0}}; AMDGPUBufferPseudoSourceValue BufferPSV; - AMDGPUImagePseudoSourceValue ImagePSV; + + DenseMap> ImagePSVs; + private: unsigned LDSWaveSpillSize = 0; @@ -629,12 +634,18 @@ public: return LDSWaveSpillSize; } + // FIXME: These should be unique const AMDGPUBufferPseudoSourceValue *getBufferPSV() const { return &BufferPSV; } - const AMDGPUImagePseudoSourceValue *getImagePSV() const { - return &ImagePSV; + const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII, + const Value *ImgRsrc) { + assert(ImgRsrc); + auto PSV = ImagePSVs.try_emplace( + ImgRsrc, + llvm::make_unique(TII)); + return PSV.first->second.get(); } }; -- 2.40.0