From: Konstantin Zhuravlyov Date: Fri, 21 Oct 2016 22:10:03 +0000 (+0000) Subject: [AMDGPU] Perform uchar to float combine for ISD::SINT_TO_FP X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=25a30cdacbe392d02a35e7ec2a7a249908b59c42;p=llvm [AMDGPU] Perform uchar to float combine for ISD::SINT_TO_FP This will prevent following regression when enabling i16 support (D18049): test/CodeGen/AMDGPU/cvt_f32_ubyte.ll Differential Revision: https://reviews.llvm.org/D25805 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@284891 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 2e9d90e30b7..fe7f2922929 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -233,6 +233,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); + setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine(ISD::FCANONICALIZE); @@ -3520,19 +3521,27 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, case AMDGPUISD::CVT_F32_UBYTE2: case AMDGPUISD::CVT_F32_UBYTE3: { unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0; + SDValue Src = N->getOperand(0); + SDValue Srl = N->getOperand(0); + if (Srl.getOpcode() == ISD::ZERO_EXTEND) + Srl = Srl.getOperand(0); // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero. - if (Src.getOpcode() == ISD::SRL) { + if (Srl.getOpcode() == ISD::SRL) { // cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x // cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x - if (const ConstantSDNode *C = dyn_cast(Src.getOperand(1))) { + if (const ConstantSDNode *C = + dyn_cast(Srl.getOperand(1))) { + Srl = DAG.getZExtOrTrunc(Srl.getOperand(0), SDLoc(Srl.getOperand(0)), + EVT(MVT::i32)); + unsigned SrcOffset = C->getZExtValue() + 8 * Offset; if (SrcOffset < 32 && SrcOffset % 8 == 0) { return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, DL, - MVT::f32, Src.getOperand(0)); + MVT::f32, Srl); } } } @@ -3550,7 +3559,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, break; } - + case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: { return performUCharToFloatCombine(N, DCI); }