From: Sanjay Patel Date: Fri, 22 Feb 2019 15:47:45 +0000 (+0000) Subject: [x86] allow narrowing of vector UINT_TO_FP X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=8ed2d5e37993fc0204b433e56d655b97e60a49a2;p=llvm [x86] allow narrowing of vector UINT_TO_FP As discussed in: D56864 D58197 Always use the narrow (128-bit) instruction when possible. We already had the signed int version of this transform. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@354675 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 704e15013ef..731e516efd3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -42016,6 +42016,11 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, InVec.getOperand(0).getValueType() == MVT::v4i32) { return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0)); } + // v2f64 CVTUDQ2PD(v4i32). + if (InOpcode == ISD::UINT_TO_FP && + InVec.getOperand(0).getValueType() == MVT::v4i32) { + return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0)); + } // v2f64 CVTPS2PD(v4f32). if (InOpcode == ISD::FP_EXTEND && InVec.getOperand(0).getValueType() == MVT::v4f32) { diff --git a/test/CodeGen/X86/vec_int_to_fp-widen.ll b/test/CodeGen/X86/vec_int_to_fp-widen.ll index 7441ce58027..41524f36d63 100644 --- a/test/CodeGen/X86/vec_int_to_fp-widen.ll +++ b/test/CodeGen/X86/vec_int_to_fp-widen.ll @@ -684,9 +684,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; ; AVX512VL-LABEL: uitofp_4i32_to_2f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i32_to_2f64: @@ -699,9 +697,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; ; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VLDQ-NEXT: vzeroupper +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index ab3fd395d33..02b55f3ffc6 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -684,9 +684,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; ; AVX512VL-LABEL: uitofp_4i32_to_2f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i32_to_2f64: @@ -699,9 +697,7 @@ define <2 x double> @uitofp_4i32_to_2f64(<4 x i32> %a) { ; ; AVX512VLDQ-LABEL: uitofp_4i32_to_2f64: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VLDQ-NEXT: vzeroupper +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %cvt = uitofp <4 x i32> %a to <4 x double> %shuf = shufflevector <4 x double> %cvt, <4 x double> undef, <2 x i32> @@ -5699,9 +5695,7 @@ define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind { ; ; AVX512VL-LABEL: extract0_uitofp_v4i32_f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: extract0_uitofp_v4i32_f64: @@ -5714,9 +5708,7 @@ define double @extract0_uitofp_v4i32_f64(<4 x i32> %x) nounwind { ; ; AVX512VLDQ-LABEL: extract0_uitofp_v4i32_f64: ; AVX512VLDQ: # %bb.0: -; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VLDQ-NEXT: vzeroupper +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %e = extractelement <4 x i32> %x, i32 0 %r = uitofp i32 %e to double @@ -5855,9 +5847,7 @@ define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind { ; AVX512VL-LABEL: extract3_uitofp_v4i32_f64: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VL-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: extract3_uitofp_v4i32_f64: @@ -5871,9 +5861,7 @@ define double @extract3_uitofp_v4i32_f64(<4 x i32> %x) nounwind { ; AVX512VLDQ-LABEL: extract3_uitofp_v4i32_f64: ; AVX512VLDQ: # %bb.0: ; AVX512VLDQ-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %ymm0 -; AVX512VLDQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 -; AVX512VLDQ-NEXT: vzeroupper +; AVX512VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 ; AVX512VLDQ-NEXT: retq %e = extractelement <4 x i32> %x, i32 3 %r = uitofp i32 %e to double