From: Craig Topper Date: Tue, 5 Dec 2017 01:45:46 +0000 (+0000) Subject: [X86] Use vector widening to support zero extend from i1 when the dest type is not... X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=eb94f55b0d0495d97b80889564ba787981417999;p=llvm [X86] Use vector widening to support zero extend from i1 when the dest type is not 512-bits and vlx is not enabled. Previously we used a wider element type and truncated. But its more efficient to keep the element type and drop unused elements. If BWI isn't supported and we have a i16 or i8 type, we'll extend it to be i32 and still use a truncate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319728 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 234af160178..eae3800f86b 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -16139,18 +16139,40 @@ static SDValue LowerZERO_EXTEND_AVX512(SDValue Op, if (InVT.getVectorElementType() != MVT::i1) return SDValue(); - // Extend VT if the target is 256 or 128bit vector and VLX is not supported. + // Extend VT if the scalar type is v8/v16 and BWI is not supported. MVT ExtVT = VT; - if (!VT.is512BitVector() && !Subtarget.hasVLX()) - ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts); + if (!Subtarget.hasBWI() && + (VT.getVectorElementType().getSizeInBits() <= 16)) + ExtVT = MVT::getVectorVT(MVT::i32, NumElts); + + // Widen to 512-bits if VLX is not supported. + MVT WideVT = ExtVT; + if (!VT.is512BitVector() && !Subtarget.hasVLX()) { + NumElts *= 512 / ExtVT.getSizeInBits(); + InVT = MVT::getVectorVT(MVT::i1, NumElts); + In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), + In, DAG.getIntPtrConstant(0, DL)); + WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), + NumElts); + } + + SDValue One = DAG.getConstant(1, DL, WideVT); + SDValue Zero = getZeroVector(WideVT, Subtarget, DAG, DL); - SDValue One = DAG.getConstant(1, DL, ExtVT); - SDValue Zero = getZeroVector(ExtVT, Subtarget, DAG, DL); + SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero); + + // Truncate if we had to extend i16/i8 above. + if (VT != ExtVT) { + WideVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); + SelectedVal = DAG.getNode(X86ISD::VTRUNC, DL, WideVT, SelectedVal); + } + + // Extract back to 128/256-bit if we widened. + if (WideVT != VT) + SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, + DAG.getIntPtrConstant(0, DL)); - SDValue SelectedVal = DAG.getSelect(DL, ExtVT, In, One, Zero); - if (VT == ExtVT) - return SelectedVal; - return DAG.getNode(X86ISD::VTRUNC, DL, VT, SelectedVal); + return SelectedVal; } static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 0487b560729..199d9143d9f 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1862,14 +1862,12 @@ define <16 x double> @ubto16f64(<16 x i32> %a) { ; NOVL: # %bb.0: ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: movq {{.*}}(%rip), %rax -; NOVL-NEXT: vpbroadcastq %rax, %zmm0 {%k1} {z} -; NOVL-NEXT: vpmovqd %zmm0, %ymm0 -; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 +; NOVL-NEXT: movl {{.*}}(%rip), %eax +; NOVL-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 ; NOVL-NEXT: kshiftrw $8, %k1, %k1 -; NOVL-NEXT: vpbroadcastq %rax, %zmm1 {%k1} {z} -; NOVL-NEXT: vpmovqd %zmm1, %ymm1 -; NOVL-NEXT: vcvtudq2pd %ymm1, %zmm1 +; NOVL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} +; NOVL-NEXT: vcvtdq2pd %ymm1, %zmm1 ; NOVL-NEXT: retq ; ; VL-LABEL: ubto16f64: @@ -1894,10 +1892,8 @@ define <8 x float> @ubto8f32(<8 x i32> %a) { ; NOVL-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; NOVL-NEXT: vpmovqd %zmm0, %ymm0 -; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0 -; NOVL-NEXT: # kill: %ymm0 %ymm0 %zmm0 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2ps %ymm0, %ymm0 ; NOVL-NEXT: retq ; ; VL-LABEL: ubto8f32: @@ -1918,9 +1914,8 @@ define <8 x double> @ubto8f64(<8 x i32> %a) { ; NOVL-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1 -; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; NOVL-NEXT: vpmovqd %zmm0, %ymm0 -; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0 +; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; NOVL-NEXT: vcvtdq2pd %ymm0, %zmm0 ; NOVL-NEXT: retq ; ; VL-LABEL: ubto8f64: diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll index e0dd7f253e8..360ee6b15be 100644 --- a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -109,9 +109,8 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) ; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; AVX512F-NEXT: kmovw %eax, %k1 -; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 -; AVX512F-NEXT: # kill: %xmm0 %xmm0 %ymm0 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: %xmm0 %xmm0 %zmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -166,8 +165,9 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) { ; AVX512F-LABEL: ext_i8_8i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: %xmm0 %xmm0 %ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -372,8 +372,8 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) { ; AVX512F-LABEL: ext_i8_8i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: kmovw %edi, %k1 -; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: %ymm0 %ymm0 %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VLBW-LABEL: ext_i8_8i32: