From 8383d4d295ee1f0d8a884fd5be2ee74218fb1cd6 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 13 Jul 2018 04:50:39 +0000 Subject: [PATCH] [X86] Remove isel patterns that turns packed add/sub/mul/div+movss/sd into scalar intrinsic instructions. This is not an optimization we should be doing in isel. This is more suitable for a DAG combine. My main concern is a future time when we support more FPENV. Changing a packed op to a scalar op could cause us to miss some exceptions that should have occured if we had done a packed op. A DAG combine would be better able to manage this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336971 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 36 +- lib/Target/X86/X86InstrSSE.td | 38 +- test/CodeGen/X86/sse-scalar-fp-arith.ll | 1104 ++++++++++++++++------- 3 files changed, 833 insertions(+), 345 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index c703e610715..3df4da13cd5 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -11481,37 +11481,37 @@ multiclass AVX512_scalar_math_fp_patterns { let Predicates = [HasAVX512] in { // extracted scalar math op with insert via movss - def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), (_.VT (scalar_to_vector - (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), - _.FRC:$src))))), - (!cast("V"#OpcPrefix#Zrr_Int) _.VT:$dst, - (COPY_TO_REGCLASS _.FRC:$src, VR128X))>; - - // vector math op with insert via movss - def : Pat<(_.VT (MoveNode (_.VT VR128X:$dst), - (Op (_.VT VR128X:$dst), (_.VT VR128X:$src)))), - (!cast("V"#OpcPrefix#Zrr_Int) _.VT:$dst, _.VT:$src)>; + def : Pat<(MoveNode + (_.VT VR128X:$dst), + (_.VT (scalar_to_vector + (Op (_.EltVT (extractelt (_.VT VR128X:$dst), (iPTR 0))), + _.FRC:$src)))), + (!cast("V"#OpcPrefix#Zrr_Int) _.VT:$dst, + (COPY_TO_REGCLASS _.FRC:$src, VR128X))>; // extracted masked scalar math op with insert via movss def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector (X86selects VK1WM:$mask, - (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (Op (_.EltVT + (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src2), _.FRC:$src0))), - (!cast("V"#OpcPrefix#Zrr_Intk) (COPY_TO_REGCLASS _.FRC:$src0, VR128X), - VK1WM:$mask, _.VT:$src1, - (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>; + (!cast("V"#OpcPrefix#Zrr_Intk) + (COPY_TO_REGCLASS _.FRC:$src0, VR128X), + VK1WM:$mask, _.VT:$src1, + (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>; // extracted masked scalar math op with insert via movss def : Pat<(MoveNode (_.VT VR128X:$src1), (scalar_to_vector (X86selects VK1WM:$mask, - (Op (_.EltVT (extractelt (_.VT VR128X:$src1), (iPTR 0))), + (Op (_.EltVT + (extractelt (_.VT VR128X:$src1), (iPTR 0))), _.FRC:$src2), (_.EltVT ZeroFP)))), - (!cast("V"#OpcPrefix#Zrr_Intkz) - VK1WM:$mask, _.VT:$src1, - (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>; + (!cast("V"#OpcPrefix#Zrr_Intkz) + VK1WM:$mask, _.VT:$src1, + (COPY_TO_REGCLASS _.FRC:$src2, VR128X))>; } } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b15ac4a378e..da8c2f8ad8f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2642,34 +2642,26 @@ let isCodeGenOnly = 1 in { multiclass scalar_math_patterns { - let Predicates = [BasePredicate] in { + let Predicates = [BasePredicate] in { // extracted scalar math op with insert via movss/movsd - def : Pat<(VT (Move (VT VR128:$dst), (VT (scalar_to_vector - (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), - RC:$src))))), - (!cast(OpcPrefix#rr_Int) VT:$dst, - (COPY_TO_REGCLASS RC:$src, VR128))>; - - // vector math op with insert via movss/movsd def : Pat<(VT (Move (VT VR128:$dst), - (Op (VT VR128:$dst), (VT VR128:$src)))), - (!cast(OpcPrefix#rr_Int) VT:$dst, VT:$src)>; - } + (VT (scalar_to_vector + (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), + RC:$src))))), + (!cast(OpcPrefix#rr_Int) VT:$dst, + (COPY_TO_REGCLASS RC:$src, VR128))>; + } - // Repeat for AVX versions of the instructions. - let Predicates = [UseAVX] in { + // Repeat for AVX versions of the instructions. + let Predicates = [UseAVX] in { // extracted scalar math op with insert via movss/movsd - def : Pat<(VT (Move (VT VR128:$dst), (VT (scalar_to_vector - (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), - RC:$src))))), - (!cast("V"#OpcPrefix#rr_Int) VT:$dst, - (COPY_TO_REGCLASS RC:$src, VR128))>; - - // vector math op with insert via movss/movsd def : Pat<(VT (Move (VT VR128:$dst), - (Op (VT VR128:$dst), (VT VR128:$src)))), - (!cast("V"#OpcPrefix#rr_Int) VT:$dst, VT:$src)>; - } + (VT (scalar_to_vector + (Op (EltTy (extractelt (VT VR128:$dst), (iPTR 0))), + RC:$src))))), + (!cast("V"#OpcPrefix#rr_Int) VT:$dst, + (COPY_TO_REGCLASS RC:$src, VR128))>; + } } defm : scalar_math_patterns; diff --git a/test/CodeGen/X86/sse-scalar-fp-arith.ll b/test/CodeGen/X86/sse-scalar-fp-arith.ll index dab4c139529..0b675aa0b27 100644 --- a/test/CodeGen/X86/sse-scalar-fp-arith.ll +++ b/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -655,496 +655,992 @@ define <2 x double> @blend_div_sd(<2 x double> %a, double %b) { ; from a packed fp instruction plus a vector insert. define <4 x float> @insert_test_add_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test_add_ss: -; SSE: # %bb.0: -; SSE-NEXT: addss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_add_ss: -; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_add_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: addps %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_add_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: addps %xmm0, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_add_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_add_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <4 x float> %a, %b %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> ret <4 x float> %2 } define <4 x float> @insert_test_sub_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test_sub_ss: -; SSE: # %bb.0: -; SSE-NEXT: subss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_sub_ss: -; AVX: # %bb.0: -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_sub_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: subps %xmm1, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_sub_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: subps %xmm1, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_sub_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_sub_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <4 x float> %a, %b %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> ret <4 x float> %2 } define <4 x float> @insert_test_mul_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test_mul_ss: -; SSE: # %bb.0: -; SSE-NEXT: mulss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_mul_ss: -; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_mul_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: mulps %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_mul_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: mulps %xmm0, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_mul_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_mul_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <4 x float> %a, %b %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> ret <4 x float> %2 } define <4 x float> @insert_test_div_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test_div_ss: -; SSE: # %bb.0: -; SSE-NEXT: divss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_div_ss: -; AVX: # %bb.0: -; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_div_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: divps %xmm1, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_div_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: divps %xmm1, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_div_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_div_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <4 x float> %a, %b %2 = shufflevector <4 x float> %1, <4 x float> %a, <4 x i32> ret <4 x float> %2 } define <2 x double> @insert_test_add_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test_add_sd: -; SSE: # %bb.0: -; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_add_sd: -; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_add_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: addpd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_add_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: addpd %xmm0, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_add_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_add_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <2 x double> %a, %b %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> ret <2 x double> %2 } define <2 x double> @insert_test_sub_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test_sub_sd: -; SSE: # %bb.0: -; SSE-NEXT: subsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_sub_sd: -; AVX: # %bb.0: -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_sub_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: subpd %xmm1, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_sub_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm0, %xmm2 +; SSE41-NEXT: subpd %xmm1, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_sub_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_sub_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <2 x double> %a, %b %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> ret <2 x double> %2 } define <2 x double> @insert_test_mul_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test_mul_sd: -; SSE: # %bb.0: -; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_mul_sd: -; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_mul_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: mulpd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_mul_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: mulpd %xmm0, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_mul_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_mul_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <2 x double> %a, %b %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> ret <2 x double> %2 } define <2 x double> @insert_test_div_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test_div_sd: -; SSE: # %bb.0: -; SSE-NEXT: divsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test_div_sd: -; AVX: # %bb.0: -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test_div_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: divpd %xmm1, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test_div_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm0, %xmm2 +; SSE41-NEXT: divpd %xmm1, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test_div_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test_div_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <2 x double> %a, %b %2 = shufflevector <2 x double> %1, <2 x double> %a, <2 x i32> ret <2 x double> %2 } define <4 x float> @insert_test2_add_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test2_add_ss: -; SSE: # %bb.0: -; SSE-NEXT: addss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_add_ss: -; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_add_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: addps %xmm1, %xmm0 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_add_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: addps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_add_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_add_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <4 x float> %b, %a %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> ret <4 x float> %2 } define <4 x float> @insert_test2_sub_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test2_sub_ss: -; SSE: # %bb.0: -; SSE-NEXT: subss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_sub_ss: -; AVX: # %bb.0: -; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_sub_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: subps %xmm0, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_sub_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: subps %xmm0, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_sub_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_sub_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <4 x float> %b, %a %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> ret <4 x float> %2 } define <4 x float> @insert_test2_mul_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test2_mul_ss: -; SSE: # %bb.0: -; SSE-NEXT: mulss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_mul_ss: -; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_mul_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: mulps %xmm1, %xmm0 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_mul_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: mulps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_mul_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_mul_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <4 x float> %b, %a %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> ret <4 x float> %2 } define <4 x float> @insert_test2_div_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test2_div_ss: -; SSE: # %bb.0: -; SSE-NEXT: divss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_div_ss: -; AVX: # %bb.0: -; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_div_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: divps %xmm0, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_div_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: divps %xmm0, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_div_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_div_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <4 x float> %b, %a %2 = shufflevector <4 x float> %1, <4 x float> %b, <4 x i32> ret <4 x float> %2 } define <2 x double> @insert_test2_add_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test2_add_sd: -; SSE: # %bb.0: -; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_add_sd: -; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_add_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: addpd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_add_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: addpd %xmm1, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_add_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_add_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <2 x double> %b, %a %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> ret <2 x double> %2 } define <2 x double> @insert_test2_sub_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test2_sub_sd: -; SSE: # %bb.0: -; SSE-NEXT: subsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_sub_sd: -; AVX: # %bb.0: -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_sub_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: subpd %xmm0, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_sub_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: subpd %xmm0, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_sub_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_sub_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <2 x double> %b, %a %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> ret <2 x double> %2 } define <2 x double> @insert_test2_mul_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test2_mul_sd: -; SSE: # %bb.0: -; SSE-NEXT: mulsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_mul_sd: -; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_mul_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: mulpd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_mul_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: mulpd %xmm1, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_mul_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_mul_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <2 x double> %b, %a %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> ret <2 x double> %2 } define <2 x double> @insert_test2_div_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test2_div_sd: -; SSE: # %bb.0: -; SSE-NEXT: divsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test2_div_sd: -; AVX: # %bb.0: -; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test2_div_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: divpd %xmm0, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test2_div_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: divpd %xmm0, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test2_div_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test2_div_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <2 x double> %b, %a %2 = shufflevector <2 x double> %1, <2 x double> %b, <2 x i32> ret <2 x double> %2 } define <4 x float> @insert_test3_add_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test3_add_ss: -; SSE: # %bb.0: -; SSE-NEXT: addss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_add_ss: -; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_add_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: addps %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_add_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: addps %xmm0, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_add_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_add_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <4 x float> %a, %b %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1 ret <4 x float> %2 } define <4 x float> @insert_test3_sub_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test3_sub_ss: -; SSE: # %bb.0: -; SSE-NEXT: subss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_sub_ss: -; AVX: # %bb.0: -; AVX-NEXT: vsubss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_sub_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: subps %xmm1, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_sub_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: subps %xmm1, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_sub_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_sub_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <4 x float> %a, %b %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1 ret <4 x float> %2 } define <4 x float> @insert_test3_mul_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test3_mul_ss: -; SSE: # %bb.0: -; SSE-NEXT: mulss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_mul_ss: -; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_mul_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: mulps %xmm0, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_mul_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: mulps %xmm0, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_mul_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_mul_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <4 x float> %a, %b %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1 ret <4 x float> %2 } define <4 x float> @insert_test3_div_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test3_div_ss: -; SSE: # %bb.0: -; SSE-NEXT: divss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_div_ss: -; AVX: # %bb.0: -; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_div_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm0, %xmm2 +; SSE2-NEXT: divps %xmm1, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_div_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm0, %xmm2 +; SSE41-NEXT: divps %xmm1, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm0[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_div_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivps %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_div_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivps %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <4 x float> %a, %b %2 = select <4 x i1> , <4 x float> %a, <4 x float> %1 ret <4 x float> %2 } define <2 x double> @insert_test3_add_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test3_add_sd: -; SSE: # %bb.0: -; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_add_sd: -; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_add_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: addpd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_add_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: addpd %xmm0, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_add_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_add_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <2 x double> %a, %b %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1 ret <2 x double> %2 } define <2 x double> @insert_test3_sub_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test3_sub_sd: -; SSE: # %bb.0: -; SSE-NEXT: subsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_sub_sd: -; AVX: # %bb.0: -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_sub_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: subpd %xmm1, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_sub_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm0, %xmm2 +; SSE41-NEXT: subpd %xmm1, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_sub_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_sub_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <2 x double> %a, %b %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1 ret <2 x double> %2 } define <2 x double> @insert_test3_mul_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test3_mul_sd: -; SSE: # %bb.0: -; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_mul_sd: -; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_mul_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: mulpd %xmm0, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_mul_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: mulpd %xmm0, %xmm1 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_mul_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_mul_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <2 x double> %a, %b %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1 ret <2 x double> %2 } define <2 x double> @insert_test3_div_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test3_div_sd: -; SSE: # %bb.0: -; SSE-NEXT: divsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test3_div_sd: -; AVX: # %bb.0: -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test3_div_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm0, %xmm2 +; SSE2-NEXT: divpd %xmm1, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test3_div_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm0, %xmm2 +; SSE41-NEXT: divpd %xmm1, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm0[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test3_div_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm1 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test3_div_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm1 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <2 x double> %a, %b %2 = select <2 x i1> , <2 x double> %a, <2 x double> %1 ret <2 x double> %2 } define <4 x float> @insert_test4_add_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test4_add_ss: -; SSE: # %bb.0: -; SSE-NEXT: addss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_add_ss: -; AVX: # %bb.0: -; AVX-NEXT: vaddss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_add_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: addps %xmm1, %xmm0 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_add_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: addps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_add_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_add_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <4 x float> %b, %a %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1 ret <4 x float> %2 } define <4 x float> @insert_test4_sub_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test4_sub_ss: -; SSE: # %bb.0: -; SSE-NEXT: subss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_sub_ss: -; AVX: # %bb.0: -; AVX-NEXT: vsubss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_sub_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: subps %xmm0, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_sub_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: subps %xmm0, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_sub_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_sub_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <4 x float> %b, %a %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1 ret <4 x float> %2 } define <4 x float> @insert_test4_mul_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test4_mul_ss: -; SSE: # %bb.0: -; SSE-NEXT: mulss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_mul_ss: -; AVX: # %bb.0: -; AVX-NEXT: vmulss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_mul_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: mulps %xmm1, %xmm0 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_mul_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: mulps %xmm1, %xmm0 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_mul_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_mul_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <4 x float> %b, %a %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1 ret <4 x float> %2 } define <4 x float> @insert_test4_div_ss(<4 x float> %a, <4 x float> %b) { -; SSE-LABEL: insert_test4_div_ss: -; SSE: # %bb.0: -; SSE-NEXT: divss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_div_ss: -; AVX: # %bb.0: -; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_div_ss: +; SSE2: # %bb.0: +; SSE2-NEXT: movaps %xmm1, %xmm2 +; SSE2-NEXT: divps %xmm0, %xmm2 +; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] +; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_div_ss: +; SSE41: # %bb.0: +; SSE41-NEXT: movaps %xmm1, %xmm2 +; SSE41-NEXT: divps %xmm0, %xmm2 +; SSE41-NEXT: blendps {{.*#+}} xmm2 = xmm2[0],xmm1[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_div_ss: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_div_ss: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <4 x float> %b, %a %2 = select <4 x i1> , <4 x float> %b, <4 x float> %1 ret <4 x float> %2 } define <2 x double> @insert_test4_add_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test4_add_sd: -; SSE: # %bb.0: -; SSE-NEXT: addsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_add_sd: -; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_add_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: addpd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_add_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: addpd %xmm1, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_add_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_add_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fadd <2 x double> %b, %a %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1 ret <2 x double> %2 } define <2 x double> @insert_test4_sub_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test4_sub_sd: -; SSE: # %bb.0: -; SSE-NEXT: subsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_sub_sd: -; AVX: # %bb.0: -; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_sub_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: subpd %xmm0, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_sub_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: subpd %xmm0, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_sub_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_sub_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fsub <2 x double> %b, %a %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1 ret <2 x double> %2 } define <2 x double> @insert_test4_mul_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test4_mul_sd: -; SSE: # %bb.0: -; SSE-NEXT: mulsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_mul_sd: -; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_mul_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: mulpd %xmm1, %xmm0 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm0[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_mul_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: mulpd %xmm1, %xmm0 +; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_mul_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_mul_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fmul <2 x double> %b, %a %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1 ret <2 x double> %2 } define <2 x double> @insert_test4_div_sd(<2 x double> %a, <2 x double> %b) { -; SSE-LABEL: insert_test4_div_sd: -; SSE: # %bb.0: -; SSE-NEXT: divsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} -; -; AVX-LABEL: insert_test4_div_sd: -; AVX: # %bb.0: -; AVX-NEXT: vdivsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; SSE2-LABEL: insert_test4_div_sd: +; SSE2: # %bb.0: +; SSE2-NEXT: movapd %xmm1, %xmm2 +; SSE2-NEXT: divpd %xmm0, %xmm2 +; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: ret{{[l|q]}} +; +; SSE41-LABEL: insert_test4_div_sd: +; SSE41: # %bb.0: +; SSE41-NEXT: movapd %xmm1, %xmm2 +; SSE41-NEXT: divpd %xmm0, %xmm2 +; SSE41-NEXT: blendpd {{.*#+}} xmm2 = xmm2[0],xmm1[1] +; SSE41-NEXT: movapd %xmm2, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX1-LABEL: insert_test4_div_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} +; +; AVX512-LABEL: insert_test4_div_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivpd %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} %1 = fdiv <2 x double> %b, %a %2 = select <2 x i1> , <2 x double> %b, <2 x double> %1 ret <2 x double> %2 -- 2.50.1