From: Matt Arsenault Date: Mon, 27 Feb 2017 22:15:25 +0000 (+0000) Subject: AMDGPU: Support v2i16/v2f16 packed operations X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=27f4f2f4bcfbdbb871e4b8c49e3c35af6ab26c50;p=llvm AMDGPU: Support v2i16/v2f16 packed operations git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@296396 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp index 23f124b637f..0652dacd9b0 100644 --- a/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp +++ b/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp @@ -181,12 +181,20 @@ bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const { } bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const { - if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 && - T->getIntegerBitWidth() <= 16) + const IntegerType *IntTy = dyn_cast(T); + if (IntTy && IntTy->getBitWidth() > 1 && IntTy->getBitWidth() <= 16) return true; - if (!T->isVectorTy()) - return false; - return needsPromotionToI32(cast(T)->getElementType()); + + if (const VectorType *VT = dyn_cast(T)) { + // TODO: The set of packed operations is more limited, so may want to + // promote some anyway. + if (ST->hasVOP3PInsts()) + return false; + + return needsPromotionToI32(VT->getElementType()); + } + + return false; } // Return true if the op promoted to i32 should have nsw set. diff --git a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index e02ced04f08..fddf94339a1 100644 --- a/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -159,6 +159,10 @@ private: SDValue &Clamp, SDValue &Omod) const; + bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; + bool SelectVOP3PMods0(SDValue In, SDValue &Src, SDValue &SrcMods, + SDValue &Clamp) const; + void SelectADD_SUB_I64(SDNode *N); void SelectUADDO_USUBO(SDNode *N); void SelectDIV_SCALE(SDNode *N); @@ -305,6 +309,20 @@ static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) { llvm_unreachable("invalid vector size"); } +static bool getConstantValue(SDValue N, uint32_t &Out) { + if (const ConstantSDNode *C = dyn_cast(N)) { + Out = C->getAPIntValue().getZExtValue(); + return true; + } + + if (const ConstantFPSDNode *C = dyn_cast(N)) { + Out = C->getValueAPF().bitcastToAPInt().getZExtValue(); + return true; + } + + return false; +} + void AMDGPUDAGToDAGISel::Select(SDNode *N) { unsigned int Opc = N->getOpcode(); if (N->isMachineOpcode()) { @@ -356,7 +374,24 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); + + if (VT == MVT::v2i16 || VT == MVT::v2f16) { + if (Opc == ISD::BUILD_VECTOR) { + uint32_t LHSVal, RHSVal; + if (getConstantValue(N->getOperand(0), LHSVal) && + getConstantValue(N->getOperand(1), RHSVal)) { + uint32_t K = LHSVal | (RHSVal << 16); + CurDAG->SelectNodeTo(N, AMDGPU::S_MOV_B32, VT, + CurDAG->getTargetConstant(K, SDLoc(N), MVT::i32)); + return; + } + } + + break; + } + assert(EltVT.bitsEq(MVT::i32)); + if (Subtarget->getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { RegClassID = selectSGPRVectorRegClassID(NumVectorElts); } else { @@ -1565,7 +1600,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const { unsigned Mods = 0; - Src = In; if (Src.getOpcode() == ISD::FNEG) { @@ -1579,7 +1613,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, } SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); - return true; } @@ -1633,6 +1666,38 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src, return SelectVOP3Mods(In, Src, SrcMods); } +bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, + SDValue &SrcMods) const { + unsigned Mods = 0; + Src = In; + + // FIXME: Look for on separate components + if (Src.getOpcode() == ISD::FNEG) { + Mods |= (SISrcMods::NEG | SISrcMods::NEG_HI); + Src = Src.getOperand(0); + } + + // Packed instructions do not have abs modifiers. + + // FIXME: Handle abs/neg of individual components. + // FIXME: Handle swizzling with op_sel + Mods |= SISrcMods::OP_SEL_1; + + SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +bool AMDGPUDAGToDAGISel::SelectVOP3PMods0(SDValue In, SDValue &Src, + SDValue &SrcMods, + SDValue &Clamp) const { + SDLoc SL(In); + + // FIXME: Handle clamp and op_sel + Clamp = CurDAG->getTargetConstant(0, SL, MVT::i32); + + return SelectVOP3PMods(In, Src, SrcMods); +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast(getTargetLowering()); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index f28afa89bd2..edaab0063da 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -644,12 +644,17 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const { bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { assert(VT.isFloatingPoint()); - return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() && - VT == MVT::f16); + + // Packed operations do not have a fabs modifier. + return VT == MVT::f32 || VT == MVT::f64 || + (Subtarget->has16BitInsts() && VT == MVT::f16); } bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { - return isFAbsFree(VT); + assert(VT.isFloatingPoint()); + return VT == MVT::f32 || VT == MVT::f64 || + (Subtarget->has16BitInsts() && VT == MVT::f16) || + (Subtarget->hasVOP3PInsts() && VT == MVT::v2f16); } bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT, diff --git a/lib/Target/AMDGPU/AMDGPUInstructions.td b/lib/Target/AMDGPU/AMDGPUInstructions.td index d0c62877524..ba2aed68fb8 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructions.td +++ b/lib/Target/AMDGPU/AMDGPUInstructions.td @@ -75,6 +75,12 @@ def brtarget : Operand; // Misc. PatFrags //===----------------------------------------------------------------------===// +class HasOneUseUnaryOp : PatFrag< + (ops node:$src0), + (op $src0), + [{ return N->hasOneUse(); }] +>; + class HasOneUseBinOp : PatFrag< (ops node:$src0, node:$src1), (op $src0, $src1), @@ -87,6 +93,7 @@ class HasOneUseTernaryOp : PatFrag< [{ return N->hasOneUse(); }] >; +def trunc_oneuse : HasOneUseUnaryOp; let Properties = [SDNPCommutative, SDNPAssociative] in { def smax_oneuse : HasOneUseBinOp; @@ -101,6 +108,8 @@ def xor_oneuse : HasOneUseBinOp; } // Properties = [SDNPCommutative, SDNPAssociative] def sub_oneuse : HasOneUseBinOp; + +def srl_oneuse : HasOneUseBinOp; def shl_oneuse : HasOneUseBinOp; def select_oneuse : HasOneUseTernaryOp