From 5d31f856ab29b56a4aa017571a2fd8f7848d22d1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 24 Nov 2016 14:46:55 +0000 Subject: [PATCH] [X86][AVX512] Add support for v2i64 fptosi/fptoui/sitofp/uitofp on AVX512DQ-only targets Use 512-bit instructions with subvector insertion/extraction like we do in a number of similar circumstances git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287882 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 10 +++--- lib/Target/X86/X86InstrAVX512.td | 20 ++++++++++++ lib/Target/X86/X86TargetTransformInfo.cpp | 4 +++ test/Analysis/CostModel/X86/fptosi.ll | 11 ++++--- test/Analysis/CostModel/X86/fptoui.ll | 9 ++++-- test/Analysis/CostModel/X86/sitofp.ll | 4 +-- test/CodeGen/X86/vec_fp_to_int.ll | 18 ++++------- test/CodeGen/X86/vec_int_to_fp.ll | 34 ++++++++------------- test/Transforms/SLPVectorizer/X86/sitofp.ll | 31 ++++++++++++++----- 9 files changed, 85 insertions(+), 56 deletions(-) diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 458d21e52ac..882df8efb10 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1251,18 +1251,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, if (Subtarget.hasDQI()) { setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal); - if (Subtarget.hasVLX()) { - setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); - setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); + if (Subtarget.hasVLX()) { // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion. setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); } diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 2793df3e584..92bb27f249a 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -6581,6 +6581,11 @@ let Predicates = [HasDQI, HasVLX] in { } let Predicates = [HasDQI, NoVLX] in { +def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))), + (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr + (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_xmm)>; + def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), @@ -6591,6 +6596,11 @@ def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))), (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; +def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))), + (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr + (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_xmm)>; + def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr (v8f32 (INSERT_SUBREG (IMPLICIT_DEF), @@ -6606,6 +6616,11 @@ def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; +def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))), + (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_xmm)>; + def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), @@ -6616,6 +6631,11 @@ def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))), (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_xmm)>; +def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))), + (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr + (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_xmm)>; + def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 6b6ac840fa5..f9d1217dbbe 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -716,6 +716,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { // potential massive combinations (elem_num x src_type x dst_type). static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, @@ -728,8 +730,10 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 }, + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 }, { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 }, diff --git a/test/Analysis/CostModel/X86/fptosi.ll b/test/Analysis/CostModel/X86/fptosi.ll index 6a4bc3c21ea..d5e21f8685a 100644 --- a/test/Analysis/CostModel/X86/fptosi.ll +++ b/test/Analysis/CostModel/X86/fptosi.ll @@ -17,7 +17,8 @@ define i32 @fptosi_double_i64(i32 %arg) { ; SSE42: cost of 6 {{.*}} %V2I64 = fptosi ; AVX1: cost of 6 {{.*}} %V2I64 = fptosi ; AVX2: cost of 6 {{.*}} %V2I64 = fptosi - ; AVX512: cost of 6 {{.*}} %V2I64 = fptosi + ; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi + ; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi %V2I64 = fptosi <2 x double> undef to <2 x i64> ; SSE2: cost of 13 {{.*}} %V4I64 = fptosi ; SSE42: cost of 13 {{.*}} %V4I64 = fptosi @@ -79,7 +80,8 @@ define i32 @fptosi_double_i16(i32 %arg) { ; SSE42: cost of 6 {{.*}} %V2I16 = fptosi ; AVX1: cost of 6 {{.*}} %V2I16 = fptosi ; AVX2: cost of 6 {{.*}} %V2I16 = fptosi - ; AVX512: cost of 6 {{.*}} %V2I16 = fptosi + ; AVX512F: cost of 6 {{.*}} %V2I16 = fptosi + ; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptosi %V2I16 = fptosi <2 x double> undef to <2 x i16> ; SSE2: cost of 13 {{.*}} %V4I16 = fptosi ; SSE42: cost of 13 {{.*}} %V4I16 = fptosi @@ -109,7 +111,8 @@ define i32 @fptosi_double_i8(i32 %arg) { ; SSE42: cost of 6 {{.*}} %V2I8 = fptosi ; AVX1: cost of 6 {{.*}} %V2I8 = fptosi ; AVX2: cost of 6 {{.*}} %V2I8 = fptosi - ; AVX512: cost of 6 {{.*}} %V2I8 = fptosi + ; AVX512F: cost of 6 {{.*}} %V2I8 = fptosi + ; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptosi %V2I8 = fptosi <2 x double> undef to <2 x i8> ; SSE2: cost of 13 {{.*}} %V4I8 = fptosi ; SSE42: cost of 13 {{.*}} %V4I8 = fptosi @@ -140,7 +143,7 @@ define i32 @fptosi_float_i64(i32 %arg) { ; AVX1: cost of 6 {{.*}} %V2I64 = fptosi ; AVX2: cost of 6 {{.*}} %V2I64 = fptosi ; AVX512F: cost of 6 {{.*}} %V2I64 = fptosi - ; AVX512DQ: cost of 6 {{.*}} %V2I64 = fptosi + ; AVX512DQ: cost of 1 {{.*}} %V2I64 = fptosi %V2I64 = fptosi <2 x float> undef to <2 x i64> ; SSE2: cost of 13 {{.*}} %V4I64 = fptosi ; SSE42: cost of 13 {{.*}} %V4I64 = fptosi diff --git a/test/Analysis/CostModel/X86/fptoui.ll b/test/Analysis/CostModel/X86/fptoui.ll index f45dd73cd06..dbdba30357d 100644 --- a/test/Analysis/CostModel/X86/fptoui.ll +++ b/test/Analysis/CostModel/X86/fptoui.ll @@ -50,7 +50,8 @@ define i32 @fptoui_double_i32(i32 %arg) { ; SSE42: cost of 6 {{.*}} %V2I32 = fptoui ; AVX1: cost of 6 {{.*}} %V2I32 = fptoui ; AVX2: cost of 6 {{.*}} %V2I32 = fptoui - ; AVX512: cost of 6 {{.*}} %V2I32 = fptoui + ; AVX512F: cost of 6 {{.*}} %V2I32 = fptoui + ; AVX512DQ: cost of 1 {{.*}} %V2I32 = fptoui %V2I32 = fptoui <2 x double> undef to <2 x i32> ; SSE2: cost of 13 {{.*}} %V4I32 = fptoui ; SSE42: cost of 13 {{.*}} %V4I32 = fptoui @@ -80,7 +81,8 @@ define i32 @fptoui_double_i16(i32 %arg) { ; SSE42: cost of 6 {{.*}} %V2I16 = fptoui ; AVX1: cost of 6 {{.*}} %V2I16 = fptoui ; AVX2: cost of 6 {{.*}} %V2I16 = fptoui - ; AVX512: cost of 6 {{.*}} %V2I16 = fptoui + ; AVX512F: cost of 6 {{.*}} %V2I16 = fptoui + ; AVX512DQ: cost of 1 {{.*}} %V2I16 = fptoui %V2I16 = fptoui <2 x double> undef to <2 x i16> ; SSE2: cost of 13 {{.*}} %V4I16 = fptoui ; SSE42: cost of 13 {{.*}} %V4I16 = fptoui @@ -110,7 +112,8 @@ define i32 @fptoui_double_i8(i32 %arg) { ; SSE42: cost of 6 {{.*}} %V2I8 = fptoui ; AVX1: cost of 6 {{.*}} %V2I8 = fptoui ; AVX2: cost of 6 {{.*}} %V2I8 = fptoui - ; AVX512: cost of 6 {{.*}} %V2I8 = fptoui + ; AVX512F: cost of 6 {{.*}} %V2I8 = fptoui + ; AVX512DQ: cost of 1 {{.*}} %V2I8 = fptoui %V2I8 = fptoui <2 x double> undef to <2 x i8> ; SSE2: cost of 13 {{.*}} %V4I8 = fptoui ; SSE42: cost of 13 {{.*}} %V4I8 = fptoui diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll index 371666141d0..a30cb5f7e82 100644 --- a/test/Analysis/CostModel/X86/sitofp.ll +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -103,7 +103,7 @@ define i32 @sitofp_i64_double() { ; AVX1: cost of 20 {{.*}} sitofp <2 x i64> ; AVX2: cost of 20 {{.*}} sitofp <2 x i64> ; AVX512F: cost of 20 {{.*}} sitofp <2 x i64> - ; AVX512DQ: cost of 20 {{.*}} sitofp <2 x i64> + ; AVX512DQ: cost of 1 {{.*}} sitofp <2 x i64> %cvt_v2i64_v2f64 = sitofp <2 x i64> undef to <2 x double> ; SSE2: cost of 40 {{.*}} sitofp <4 x i64> @@ -222,7 +222,7 @@ define i32 @sitofp_i64_float() { ; AVX1: cost of 4 {{.*}} sitofp <2 x i64> ; AVX2: cost of 4 {{.*}} sitofp <2 x i64> ; AVX512F: cost of 4 {{.*}} sitofp <2 x i64> - ; AVX512DQ: cost of 4 {{.*}} sitofp <2 x i64> + ; AVX512DQ: cost of 1 {{.*}} sitofp <2 x i64> %cvt_v2i64_v2f32 = sitofp <2 x i64> undef to <2 x float> ; SSE2: cost of 30 {{.*}} sitofp <4 x i64> diff --git a/test/CodeGen/X86/vec_fp_to_int.ll b/test/CodeGen/X86/vec_fp_to_int.ll index e3c711d3a50..11cc4ce87e4 100644 --- a/test/CodeGen/X86/vec_fp_to_int.ll +++ b/test/CodeGen/X86/vec_fp_to_int.ll @@ -60,12 +60,9 @@ define <2 x i64> @fptosi_2f64_to_2i64(<2 x double> %a) { ; ; AVX512DQ-LABEL: fptosi_2f64_to_2i64: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax -; AVX512DQ-NEXT: vmovq %rax, %xmm1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2si %xmm0, %rax -; AVX512DQ-NEXT: vmovq %rax, %xmm0 -; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptosi_2f64_to_2i64: @@ -334,12 +331,9 @@ define <2 x i64> @fptoui_2f64_to_2i64(<2 x double> %a) { ; ; AVX512DQ-LABEL: fptoui_2f64_to_2i64: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512DQ-NEXT: vmovq %rax, %xmm1 -; AVX512DQ-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; AVX512DQ-NEXT: vcvttsd2usi %xmm0, %rax -; AVX512DQ-NEXT: vmovq %rax, %xmm0 -; AVX512DQ-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvttpd2uqq %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: fptoui_2f64_to_2i64: diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 13cdc48d5d9..8876340e83f 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -58,11 +58,9 @@ define <2 x double> @sitofp_2i64_to_2f64(<2 x i64> %a) { ; ; AVX512DQ-LABEL: sitofp_2i64_to_2f64: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_2i64_to_2f64: @@ -497,11 +495,9 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; ; AVX512DQ-LABEL: uitofp_2i64_to_2f64: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_2i64_to_2f64: @@ -2643,12 +2639,9 @@ define <2 x double> @sitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; ; AVX512DQ-LABEL: sitofp_load_2i64_to_2f64: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm0 -; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 +; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: sitofp_load_2i64_to_2f64: @@ -2965,12 +2958,9 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; ; AVX512DQ-LABEL: uitofp_load_2i64_to_2f64: ; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512DQ-NEXT: vpextrq $1, %xmm0, %rax -; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm1, %xmm1 -; AVX512DQ-NEXT: vmovq %xmm0, %rax -; AVX512DQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm0 -; AVX512DQ-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512DQ-NEXT: vmovaps (%rdi), %xmm0 +; AVX512DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0 +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 ; AVX512DQ-NEXT: retq ; ; AVX512VLDQ-LABEL: uitofp_load_2i64_to_2f64: diff --git a/test/Transforms/SLPVectorizer/X86/sitofp.ll b/test/Transforms/SLPVectorizer/X86/sitofp.ll index 3d472bb2c20..6e91a2181cc 100644 --- a/test/Transforms/SLPVectorizer/X86/sitofp.ll +++ b/test/Transforms/SLPVectorizer/X86/sitofp.ll @@ -20,14 +20,29 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" ; define void @sitofp_2i64_2f64() #0 { -; CHECK-LABEL: @sitofp_2i64_2f64( -; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 -; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -; CHECK-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double -; CHECK-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double -; CHECK-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 -; CHECK-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 -; CHECK-NEXT: ret void +; SSE-LABEL: @sitofp_2i64_2f64( +; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; SSE-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double +; SSE-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double +; SSE-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; SSE-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; SSE-NEXT: ret void +; +; AVX256-LABEL: @sitofp_2i64_2f64( +; AVX256-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 +; AVX256-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 +; AVX256-NEXT: [[CVT0:%.*]] = sitofp i64 [[LD0]] to double +; AVX256-NEXT: [[CVT1:%.*]] = sitofp i64 [[LD1]] to double +; AVX256-NEXT: store double [[CVT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 64 +; AVX256-NEXT: store double [[CVT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8 +; AVX256-NEXT: ret void +; +; AVX512-LABEL: @sitofp_2i64_2f64( +; AVX512-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([8 x i64]* @src64 to <2 x i64>*), align 64 +; AVX512-NEXT: [[TMP2:%.*]] = sitofp <2 x i64> [[TMP1]] to <2 x double> +; AVX512-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 64 +; AVX512-NEXT: ret void ; %ld0 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 0), align 64 %ld1 = load i64, i64* getelementptr inbounds ([8 x i64], [8 x i64]* @src64, i32 0, i64 1), align 8 -- 2.50.1