From: Craig Topper Date: Sun, 8 Jan 2017 21:32:30 +0000 (+0000) Subject: [AVX-512] If avx512dq is available use vpmovm2d/vpmovm2q instead of vselect of zeroes... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=c489a869ef810a279a4ee421da09bb8eedd397cd;p=llvm [AVX-512] If avx512dq is available use vpmovm2d/vpmovm2q instead of vselect of zeroes/ones when handling sign extends of i1 without VLX. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@291402 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 4ccdece23c3..b41d9f45490 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -17206,22 +17206,26 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op, if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI()) return SDValue(); - if (VT.is512BitVector() && InVT.getVectorElementType() != MVT::i1) { + if (VT.is512BitVector() && InVTElt != MVT::i1) { if (In.getOpcode() == X86ISD::VSEXT || In.getOpcode() == X86ISD::VZEXT) return DAG.getNode(In.getOpcode(), dl, VT, In.getOperand(0)); return DAG.getNode(X86ISD::VSEXT, dl, VT, In); } - assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type"); + assert (InVTElt == MVT::i1 && "Unexpected vector type"); MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); - SDValue NegOne = DAG.getConstant( - APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); - SDValue Zero = DAG.getConstant( - APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT); + SDValue V; + if (Subtarget.hasDQI()) { + V = DAG.getNode(X86ISD::VSEXT, dl, ExtVT, In); + assert(!VT.is512BitVector() && "Unexpected vector type"); + } else { + SDValue NegOne = getOnesVector(ExtVT, Subtarget, DAG, dl); + SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, dl); + V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); + if (VT.is512BitVector()) + return V; + } - SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero); - if (VT.is512BitVector()) - return V; return DAG.getNode(X86ISD::VTRUNC, dl, VT, V); } diff --git a/test/CodeGen/X86/vector-compare-results.ll b/test/CodeGen/X86/vector-compare-results.ll index 17269e15dc5..42e47f3ef12 100644 --- a/test/CodeGen/X86/vector-compare-results.ll +++ b/test/CodeGen/X86/vector-compare-results.ll @@ -605,13 +605,28 @@ define <8 x i1> @test_cmp_v8f64(<8 x double> %a0, <8 x double> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v8f64: -; AVX512: # BB#0: -; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v8f64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v8f64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v8f64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <8 x double> %a0, %a1 ret <8 x i1> %1 } @@ -652,13 +667,28 @@ define <16 x i1> @test_cmp_v16f32(<16 x float> %a0, <16 x float> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16f32: -; AVX512: # BB#0: -; AVX512-NEXT: vcmpltps %zmm0, %zmm1, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16f32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16f32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vcmpltps %zmm0, %zmm1, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = fcmp ogt <16 x float> %a0, %a1 ret <16 x i1> %1 } @@ -750,13 +780,28 @@ define <8 x i1> @test_cmp_v8i64(<8 x i64> %a0, <8 x i64> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v8i64: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovqw %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v8i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v8i64: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2q %k0, %zmm0 +; AVX512DQ-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v8i64: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <8 x i64> %a0, %a1 ret <8 x i1> %1 } @@ -800,13 +845,28 @@ define <16 x i1> @test_cmp_v16i32(<16 x i32> %a0, <16 x i32> %a1) nounwind { ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -; AVX512-LABEL: test_cmp_v16i32: -; AVX512: # BB#0: -; AVX512-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 -; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 -; AVX512-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} -; AVX512-NEXT: vpmovdb %zmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: test_cmp_v16i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512F-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512DQ-LABEL: test_cmp_v16i32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 +; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512DQ-NEXT: retq +; +; AVX512BW-LABEL: test_cmp_v16i32: +; AVX512BW: # BB#0: +; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 +; AVX512BW-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z} +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: retq %1 = icmp sgt <16 x i32> %a0, %a1 ret <16 x i1> %1 }