From: Simon Pilgrim Date: Sun, 26 May 2019 10:54:23 +0000 (+0000) Subject: [X86][AVX] combineBitcastvxi1 - peek through bitops to determine size of original... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=7cf3718080b714f39d202520671a338b9508b53c;p=llvm [X86][AVX] combineBitcastvxi1 - peek through bitops to determine size of original vector We were only testing for direct SETCC results - this allows us to peek through AND/OR/XOR combinations of the comparison results as well. There's a missing SEXT(PACKSS) fold that I need to investigate for v8i1 cases before I can enable it there as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@361716 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 170e3cf33ba..8b6edaa50ba 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -34126,6 +34126,21 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG, EltNo); } +// Helper to peek through bitops/setcc to determine size of source vector. +// Allows combineBitcastvxi1 to determine what size vector generated a . +static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size) { + switch (Src.getOpcode()) { + case ISD::SETCC: + return Src.getOperand(0).getValueSizeInBits() == Size; + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return checkBitcastSrcVectorSize(Src.getOperand(0), Size) && + checkBitcastSrcVectorSize(Src.getOperand(1), Size); + } + return false; +} + // Try to match patterns such as // (i16 bitcast (v16i1 x)) // -> @@ -34174,10 +34189,8 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, SExtVT = MVT::v4i32; // For cases such as (i4 bitcast (v4i1 setcc v4i64 v1, v2)) // sign-extend to a 256-bit operation to avoid truncation. - if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && - Src.getOperand(0).getValueType().is256BitVector()) { + if (Subtarget.hasAVX() && checkBitcastSrcVectorSize(Src, 256)) SExtVT = MVT::v4i64; - } break; case MVT::v8i1: SExtVT = MVT::v8i16; @@ -34186,6 +34199,7 @@ static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, // If the setcc operand is 128-bit, prefer sign-extending to 128-bit over // 256-bit because the shuffle is cheaper than sign extending the result of // the compare. + // TODO : use checkBitcastSrcVectorSize if (Src.getOpcode() == ISD::SETCC && Subtarget.hasAVX() && (Src.getOperand(0).getValueType().is256BitVector() || Src.getOperand(0).getValueType().is512BitVector())) { diff --git a/test/CodeGen/X86/bitcast-and-setcc-256.ll b/test/CodeGen/X86/bitcast-and-setcc-256.ll index 85ae7c0c421..b982cde2a95 100644 --- a/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -55,18 +55,18 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; ; AVX1-LABEL: v4i64: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm1 -; AVX1-NEXT: vpand %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm3 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vpcmpgtq %xmm1, %xmm4, %xmm1 ; AVX1-NEXT: vpcmpgtq %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpackssdw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: vmovmskps %xmm0, %eax +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vandpd %ymm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovmskpd %ymm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq @@ -76,9 +76,7 @@ define i4 @v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c, <4 x i64> %d) { ; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; AVX2-NEXT: vpcmpgtq %ymm3, %ymm2, %ymm1 ; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 -; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vmovmskps %xmm0, %eax +; AVX2-NEXT: vmovmskpd %ymm0, %eax ; AVX2-NEXT: # kill: def $al killed $al killed $eax ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -126,9 +124,7 @@ define i4 @v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> ; AVX12-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0 ; AVX12-NEXT: vcmpltpd %ymm2, %ymm3, %ymm1 ; AVX12-NEXT: vandpd %ymm1, %ymm0, %ymm0 -; AVX12-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX12-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX12-NEXT: vmovmskps %xmm0, %eax +; AVX12-NEXT: vmovmskpd %ymm0, %eax ; AVX12-NEXT: # kill: def $al killed $al killed $eax ; AVX12-NEXT: vzeroupper ; AVX12-NEXT: retq