From: Simon Pilgrim Date: Wed, 8 May 2019 15:49:10 +0000 (+0000) Subject: [AMDGPU] Reapplied BFE canonicalization from D60462 X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=bba36ee9e9211eb9986466a65abcf35691180c01;p=llvm [AMDGPU] Reapplied BFE canonicalization from D60462 This was committed in rL358887 but reverted in rL360066 due to a x86 regression, really it should be have been pre-committed instead of being part of the SimplifyDemandedBits bitcast patch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@360263 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 994e912ac9c..409fbfa22f3 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3202,30 +3202,44 @@ SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const { - if (N->getValueType(0) != MVT::i64) - return SDValue(); - - const ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); + auto *RHS = dyn_cast(N->getOperand(1)); if (!RHS) return SDValue(); + EVT VT = N->getValueType(0); + SDValue LHS = N->getOperand(0); unsigned ShiftAmt = RHS->getZExtValue(); + SelectionDAG &DAG = DCI.DAG; + SDLoc SL(N); + + // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) + // this improves the ability to match BFE patterns in isel. + if (LHS.getOpcode() == ISD::AND) { + if (auto *Mask = dyn_cast(LHS.getOperand(1))) { + if (Mask->getAPIntValue().isShiftedMask() && + Mask->getAPIntValue().countTrailingZeros() == ShiftAmt) { + return DAG.getNode( + ISD::AND, SL, VT, + DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), + DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); + } + } + } + + if (VT != MVT::i64) + return SDValue(); + if (ShiftAmt < 32) return SDValue(); // srl i64:x, C for C >= 32 // => // build_pair (srl hi_32(x), C - 32), 0 - - SelectionDAG &DAG = DCI.DAG; - SDLoc SL(N); - SDValue One = DAG.getConstant(1, SL, MVT::i32); SDValue Zero = DAG.getConstant(0, SL, MVT::i32); - SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, N->getOperand(0)); - SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, - VecOp, One); + SDValue VecOp = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, LHS); + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecOp, One); SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); diff --git a/test/CodeGen/AMDGPU/store-weird-sizes.ll b/test/CodeGen/AMDGPU/store-weird-sizes.ll index 13380e03e32..f5857a330fa 100644 --- a/test/CodeGen/AMDGPU/store-weird-sizes.ll +++ b/test/CodeGen/AMDGPU/store-weird-sizes.ll @@ -86,8 +86,8 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 ; GFX9-NEXT: v_mov_b32_e32 v2, s2 ; GFX9-NEXT: ds_write_b16 v1, v2 offset:4 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_and_b32_e32 v0, 0x7f0000, v0 -; GFX9-NEXT: ds_write_b8_d16_hi v1, v0 offset:6 +; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 7 +; GFX9-NEXT: ds_write_b8 v1, v0 offset:6 ; GFX9-NEXT: ds_write_b32 v1, v3 ; GFX9-NEXT: s_endpgm store i55 %arg, i55 addrspace(3)* %ptr, align 8