From 06a2bf816bdb9681bd250e38c27b6545ea15c051 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 23 Nov 2016 21:19:57 +0000 Subject: [PATCH] [X86][SSE] Add v2i64 -> v2i32 + zero codegen test git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@287813 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/vec_int_to_fp.ll | 610 +++++++++++++++++++----------- 1 file changed, 390 insertions(+), 220 deletions(-) diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 66fa03dd3ba..a2be5995c25 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -1131,6 +1131,66 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ret <4 x float> %ext } +define <4 x float> @sitofp_2i64_to_4f32_zero(<2 x i64> %a) { +; SSE-LABEL: sitofp_2i64_to_4f32_zero: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; SSE-NEXT: retq +; +; VEX-LABEL: sitofp_2i64_to_4f32_zero: +; VEX: # BB#0: +; VEX-NEXT: vpextrq $1, %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq +; +; AVX512F-LABEL: sitofp_2i64_to_4f32_zero: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: sitofp_2i64_to_4f32_zero: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: sitofp_2i64_to_4f32_zero: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0 +; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: sitofp_2i64_to_4f32_zero: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512VLDQ-NEXT: retq + %cvt = sitofp <2 x i64> %a to <2 x float> + %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> + ret <4 x float> %ext +} + define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-LABEL: sitofp_4i64_to_4f32_undef: ; SSE: # BB#0: @@ -1572,12 +1632,12 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB38_1 +; SSE-NEXT: js .LBB39_1 ; SSE-NEXT: # BB#2: ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: jmp .LBB38_3 -; SSE-NEXT: .LBB38_1: +; SSE-NEXT: jmp .LBB39_3 +; SSE-NEXT: .LBB39_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -1585,17 +1645,17 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 -; SSE-NEXT: .LBB38_3: +; SSE-NEXT: .LBB39_3: ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB38_4 +; SSE-NEXT: js .LBB39_4 ; SSE-NEXT: # BB#5: ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: retq -; SSE-NEXT: .LBB38_4: +; SSE-NEXT: .LBB39_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -1610,39 +1670,39 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX: # BB#0: ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB38_1 +; VEX-NEXT: js .LBB39_1 ; VEX-NEXT: # BB#2: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; VEX-NEXT: jmp .LBB38_3 -; VEX-NEXT: .LBB38_1: +; VEX-NEXT: jmp .LBB39_3 +; VEX-NEXT: .LBB39_1: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; VEX-NEXT: .LBB38_3: +; VEX-NEXT: .LBB39_3: ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB38_4 +; VEX-NEXT: js .LBB39_4 ; VEX-NEXT: # BB#5: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; VEX-NEXT: jmp .LBB38_6 -; VEX-NEXT: .LBB38_4: +; VEX-NEXT: jmp .LBB39_6 +; VEX-NEXT: .LBB39_4: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: .LBB38_6: +; VEX-NEXT: .LBB39_6: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB38_8 +; VEX-NEXT: js .LBB39_8 ; VEX-NEXT: # BB#7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB38_8: +; VEX-NEXT: .LBB39_8: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; VEX-NEXT: retq @@ -1688,25 +1748,135 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ret <4 x float> %ext } +define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { +; SSE-LABEL: uitofp_2i64_to_2f32: +; SSE: # BB#0: +; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] +; SSE-NEXT: movd %xmm1, %rax +; SSE-NEXT: testq %rax, %rax +; SSE-NEXT: js .LBB40_1 +; SSE-NEXT: # BB#2: +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: jmp .LBB40_3 +; SSE-NEXT: .LBB40_1: +; SSE-NEXT: movq %rax, %rcx +; SSE-NEXT: shrq %rcx +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: xorps %xmm1, %xmm1 +; SSE-NEXT: cvtsi2ssq %rax, %xmm1 +; SSE-NEXT: addss %xmm1, %xmm1 +; SSE-NEXT: .LBB40_3: +; SSE-NEXT: movd %xmm0, %rax +; SSE-NEXT: testq %rax, %rax +; SSE-NEXT: js .LBB40_4 +; SSE-NEXT: # BB#5: +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: jmp .LBB40_6 +; SSE-NEXT: .LBB40_4: +; SSE-NEXT: movq %rax, %rcx +; SSE-NEXT: shrq %rcx +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: orq %rcx, %rax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssq %rax, %xmm0 +; SSE-NEXT: addss %xmm0, %xmm0 +; SSE-NEXT: .LBB40_6: +; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero +; SSE-NEXT: retq +; +; VEX-LABEL: uitofp_2i64_to_2f32: +; VEX: # BB#0: +; VEX-NEXT: vpextrq $1, %xmm0, %rax +; VEX-NEXT: testq %rax, %rax +; VEX-NEXT: js .LBB40_1 +; VEX-NEXT: # BB#2: +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; VEX-NEXT: jmp .LBB40_3 +; VEX-NEXT: .LBB40_1: +; VEX-NEXT: movq %rax, %rcx +; VEX-NEXT: shrq %rcx +; VEX-NEXT: andl $1, %eax +; VEX-NEXT: orq %rcx, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 +; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1 +; VEX-NEXT: .LBB40_3: +; VEX-NEXT: vmovq %xmm0, %rax +; VEX-NEXT: testq %rax, %rax +; VEX-NEXT: js .LBB40_4 +; VEX-NEXT: # BB#5: +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq +; VEX-NEXT: .LBB40_4: +; VEX-NEXT: movq %rax, %rcx +; VEX-NEXT: shrq %rcx +; VEX-NEXT: andl $1, %eax +; VEX-NEXT: orq %rcx, %rax +; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 +; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq +; +; AVX512F-LABEL: uitofp_2i64_to_2f32: +; AVX512F: # BB#0: +; AVX512F-NEXT: vpextrq $1, %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: uitofp_2i64_to_2f32: +; AVX512VL: # BB#0: +; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1 +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: vcvtusi2ssq %rax, %xmm2, %xmm0 +; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512VL-NEXT: retq +; +; AVX512DQ-LABEL: uitofp_2i64_to_2f32: +; AVX512DQ: # BB#0: +; AVX512DQ-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0 +; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512DQ-NEXT: retq +; +; AVX512VLDQ-LABEL: uitofp_2i64_to_2f32: +; AVX512VLDQ: # BB#0: +; AVX512VLDQ-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0 +; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; AVX512VLDQ-NEXT: retq + %cvt = uitofp <2 x i64> %a to <2 x float> + %ext = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> + ret <4 x float> %ext +} + define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-LABEL: uitofp_4i64_to_4f32_undef: ; SSE: # BB#0: ; SSE-NEXT: movdqa %xmm0, %xmm1 ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: xorps %xmm2, %xmm2 -; SSE-NEXT: js .LBB39_2 +; SSE-NEXT: js .LBB41_2 ; SSE-NEXT: # BB#1: ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: .LBB39_2: +; SSE-NEXT: .LBB41_2: ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB39_3 +; SSE-NEXT: js .LBB41_3 ; SSE-NEXT: # BB#4: ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: jmp .LBB39_5 -; SSE-NEXT: .LBB39_3: +; SSE-NEXT: jmp .LBB41_5 +; SSE-NEXT: .LBB41_3: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -1714,17 +1884,17 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 -; SSE-NEXT: .LBB39_5: +; SSE-NEXT: .LBB41_5: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB39_6 +; SSE-NEXT: js .LBB41_6 ; SSE-NEXT: # BB#7: ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: jmp .LBB39_8 -; SSE-NEXT: .LBB39_6: +; SSE-NEXT: jmp .LBB41_8 +; SSE-NEXT: .LBB41_6: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -1732,7 +1902,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 -; SSE-NEXT: .LBB39_8: +; SSE-NEXT: .LBB41_8: ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: retq @@ -1741,39 +1911,39 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX: # BB#0: ; VEX-NEXT: vpextrq $1, %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB39_1 +; VEX-NEXT: js .LBB41_1 ; VEX-NEXT: # BB#2: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; VEX-NEXT: jmp .LBB39_3 -; VEX-NEXT: .LBB39_1: +; VEX-NEXT: jmp .LBB41_3 +; VEX-NEXT: .LBB41_1: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; VEX-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; VEX-NEXT: .LBB39_3: +; VEX-NEXT: .LBB41_3: ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB39_4 +; VEX-NEXT: js .LBB41_4 ; VEX-NEXT: # BB#5: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 -; VEX-NEXT: jmp .LBB39_6 -; VEX-NEXT: .LBB39_4: +; VEX-NEXT: jmp .LBB41_6 +; VEX-NEXT: .LBB41_4: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx ; VEX-NEXT: andl $1, %eax ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: .LBB39_6: +; VEX-NEXT: .LBB41_6: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: js .LBB39_8 +; VEX-NEXT: js .LBB41_8 ; VEX-NEXT: # BB#7: ; VEX-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB39_8: +; VEX-NEXT: .LBB41_8: ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] ; VEX-NEXT: retq @@ -1998,41 +2168,41 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE: # BB#0: ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB45_1 +; SSE-NEXT: js .LBB47_1 ; SSE-NEXT: # BB#2: ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: jmp .LBB45_3 -; SSE-NEXT: .LBB45_1: +; SSE-NEXT: jmp .LBB47_3 +; SSE-NEXT: .LBB47_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE-NEXT: addss %xmm3, %xmm3 -; SSE-NEXT: .LBB45_3: +; SSE-NEXT: .LBB47_3: ; SSE-NEXT: movd %xmm0, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB45_4 +; SSE-NEXT: js .LBB47_4 ; SSE-NEXT: # BB#5: ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: jmp .LBB45_6 -; SSE-NEXT: .LBB45_4: +; SSE-NEXT: jmp .LBB47_6 +; SSE-NEXT: .LBB47_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: addss %xmm2, %xmm2 -; SSE-NEXT: .LBB45_6: +; SSE-NEXT: .LBB47_6: ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB45_7 +; SSE-NEXT: js .LBB47_7 ; SSE-NEXT: # BB#8: ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: jmp .LBB45_9 -; SSE-NEXT: .LBB45_7: +; SSE-NEXT: jmp .LBB47_9 +; SSE-NEXT: .LBB47_7: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -2040,17 +2210,17 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 -; SSE-NEXT: .LBB45_9: +; SSE-NEXT: .LBB47_9: ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; SSE-NEXT: movd %xmm0, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB45_10 +; SSE-NEXT: js .LBB47_10 ; SSE-NEXT: # BB#11: ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: jmp .LBB45_12 -; SSE-NEXT: .LBB45_10: +; SSE-NEXT: jmp .LBB47_12 +; SSE-NEXT: .LBB47_10: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -2058,7 +2228,7 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; SSE-NEXT: xorps %xmm0, %xmm0 ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 -; SSE-NEXT: .LBB45_12: +; SSE-NEXT: .LBB47_12: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] ; SSE-NEXT: movaps %xmm2, %xmm0 @@ -2068,58 +2238,58 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX1: # BB#0: ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB45_1 +; AVX1-NEXT: js .LBB47_1 ; AVX1-NEXT: # BB#2: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX1-NEXT: jmp .LBB45_3 -; AVX1-NEXT: .LBB45_1: +; AVX1-NEXT: jmp .LBB47_3 +; AVX1-NEXT: .LBB47_1: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: .LBB45_3: +; AVX1-NEXT: .LBB47_3: ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB45_4 +; AVX1-NEXT: js .LBB47_4 ; AVX1-NEXT: # BB#5: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 -; AVX1-NEXT: jmp .LBB45_6 -; AVX1-NEXT: .LBB45_4: +; AVX1-NEXT: jmp .LBB47_6 +; AVX1-NEXT: .LBB47_4: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB45_6: +; AVX1-NEXT: .LBB47_6: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB45_7 +; AVX1-NEXT: js .LBB47_7 ; AVX1-NEXT: # BB#8: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 -; AVX1-NEXT: jmp .LBB45_9 -; AVX1-NEXT: .LBB45_7: +; AVX1-NEXT: jmp .LBB47_9 +; AVX1-NEXT: .LBB47_7: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB45_9: +; AVX1-NEXT: .LBB47_9: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB45_10 +; AVX1-NEXT: js .LBB47_10 ; AVX1-NEXT: # BB#11: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq -; AVX1-NEXT: .LBB45_10: +; AVX1-NEXT: .LBB47_10: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -2134,58 +2304,58 @@ define <4 x float> @uitofp_4i64_to_4f32(<4 x i64> %a) { ; AVX2: # BB#0: ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB45_1 +; AVX2-NEXT: js .LBB47_1 ; AVX2-NEXT: # BB#2: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX2-NEXT: jmp .LBB45_3 -; AVX2-NEXT: .LBB45_1: +; AVX2-NEXT: jmp .LBB47_3 +; AVX2-NEXT: .LBB47_1: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: .LBB45_3: +; AVX2-NEXT: .LBB47_3: ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB45_4 +; AVX2-NEXT: js .LBB47_4 ; AVX2-NEXT: # BB#5: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 -; AVX2-NEXT: jmp .LBB45_6 -; AVX2-NEXT: .LBB45_4: +; AVX2-NEXT: jmp .LBB47_6 +; AVX2-NEXT: .LBB47_4: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: .LBB45_6: +; AVX2-NEXT: .LBB47_6: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB45_7 +; AVX2-NEXT: js .LBB47_7 ; AVX2-NEXT: # BB#8: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 -; AVX2-NEXT: jmp .LBB45_9 -; AVX2-NEXT: .LBB45_7: +; AVX2-NEXT: jmp .LBB47_9 +; AVX2-NEXT: .LBB47_7: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: .LBB45_9: +; AVX2-NEXT: .LBB47_9: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB45_10 +; AVX2-NEXT: js .LBB47_10 ; AVX2-NEXT: # BB#11: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq -; AVX2-NEXT: .LBB45_10: +; AVX2-NEXT: .LBB47_10: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -3686,41 +3856,41 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-NEXT: movdqa 16(%rdi), %xmm3 ; SSE-NEXT: movd %xmm3, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB74_1 +; SSE-NEXT: js .LBB76_1 ; SSE-NEXT: # BB#2: ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: jmp .LBB74_3 -; SSE-NEXT: .LBB74_1: +; SSE-NEXT: jmp .LBB76_3 +; SSE-NEXT: .LBB76_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: addss %xmm2, %xmm2 -; SSE-NEXT: .LBB74_3: +; SSE-NEXT: .LBB76_3: ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB74_4 +; SSE-NEXT: js .LBB76_4 ; SSE-NEXT: # BB#5: ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: jmp .LBB74_6 -; SSE-NEXT: .LBB74_4: +; SSE-NEXT: jmp .LBB76_6 +; SSE-NEXT: .LBB76_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 -; SSE-NEXT: .LBB74_6: +; SSE-NEXT: .LBB76_6: ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] ; SSE-NEXT: movd %xmm3, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB74_7 +; SSE-NEXT: js .LBB76_7 ; SSE-NEXT: # BB#8: ; SSE-NEXT: xorps %xmm3, %xmm3 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: jmp .LBB74_9 -; SSE-NEXT: .LBB74_7: +; SSE-NEXT: jmp .LBB76_9 +; SSE-NEXT: .LBB76_7: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -3728,17 +3898,17 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-NEXT: xorps %xmm3, %xmm3 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE-NEXT: addss %xmm3, %xmm3 -; SSE-NEXT: .LBB74_9: +; SSE-NEXT: .LBB76_9: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB74_10 +; SSE-NEXT: js .LBB76_10 ; SSE-NEXT: # BB#11: ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: jmp .LBB74_12 -; SSE-NEXT: .LBB74_10: +; SSE-NEXT: jmp .LBB76_12 +; SSE-NEXT: .LBB76_10: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -3746,7 +3916,7 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 -; SSE-NEXT: .LBB74_12: +; SSE-NEXT: .LBB76_12: ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: retq @@ -3756,58 +3926,58 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX1-NEXT: vmovdqa (%rdi), %ymm0 ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB74_1 +; AVX1-NEXT: js .LBB76_1 ; AVX1-NEXT: # BB#2: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX1-NEXT: jmp .LBB74_3 -; AVX1-NEXT: .LBB74_1: +; AVX1-NEXT: jmp .LBB76_3 +; AVX1-NEXT: .LBB76_1: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: .LBB74_3: +; AVX1-NEXT: .LBB76_3: ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB74_4 +; AVX1-NEXT: js .LBB76_4 ; AVX1-NEXT: # BB#5: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 -; AVX1-NEXT: jmp .LBB74_6 -; AVX1-NEXT: .LBB74_4: +; AVX1-NEXT: jmp .LBB76_6 +; AVX1-NEXT: .LBB76_4: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB74_6: +; AVX1-NEXT: .LBB76_6: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB74_7 +; AVX1-NEXT: js .LBB76_7 ; AVX1-NEXT: # BB#8: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 -; AVX1-NEXT: jmp .LBB74_9 -; AVX1-NEXT: .LBB74_7: +; AVX1-NEXT: jmp .LBB76_9 +; AVX1-NEXT: .LBB76_7: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB74_9: +; AVX1-NEXT: .LBB76_9: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB74_10 +; AVX1-NEXT: js .LBB76_10 ; AVX1-NEXT: # BB#11: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX1-NEXT: vzeroupper ; AVX1-NEXT: retq -; AVX1-NEXT: .LBB74_10: +; AVX1-NEXT: .LBB76_10: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax @@ -3823,58 +3993,58 @@ define <4 x float> @uitofp_load_4i64_to_4f32(<4 x i64> *%a) { ; AVX2-NEXT: vmovdqa (%rdi), %ymm0 ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB74_1 +; AVX2-NEXT: js .LBB76_1 ; AVX2-NEXT: # BB#2: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX2-NEXT: jmp .LBB74_3 -; AVX2-NEXT: .LBB74_1: +; AVX2-NEXT: jmp .LBB76_3 +; AVX2-NEXT: .LBB76_1: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: .LBB74_3: +; AVX2-NEXT: .LBB76_3: ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB74_4 +; AVX2-NEXT: js .LBB76_4 ; AVX2-NEXT: # BB#5: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 -; AVX2-NEXT: jmp .LBB74_6 -; AVX2-NEXT: .LBB74_4: +; AVX2-NEXT: jmp .LBB76_6 +; AVX2-NEXT: .LBB76_4: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm2, %xmm2 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: .LBB74_6: +; AVX2-NEXT: .LBB76_6: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB74_7 +; AVX2-NEXT: js .LBB76_7 ; AVX2-NEXT: # BB#8: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 -; AVX2-NEXT: jmp .LBB74_9 -; AVX2-NEXT: .LBB74_7: +; AVX2-NEXT: jmp .LBB76_9 +; AVX2-NEXT: .LBB76_7: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm2 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: .LBB74_9: +; AVX2-NEXT: .LBB76_9: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB74_10 +; AVX2-NEXT: js .LBB76_10 ; AVX2-NEXT: # BB#11: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm0 ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq -; AVX2-NEXT: .LBB74_10: +; AVX2-NEXT: .LBB76_10: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax @@ -4047,56 +4217,56 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: movdqa 48(%rdi), %xmm3 ; SSE-NEXT: movd %xmm5, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_1 +; SSE-NEXT: js .LBB80_1 ; SSE-NEXT: # BB#2: ; SSE-NEXT: cvtsi2ssq %rax, %xmm4 -; SSE-NEXT: jmp .LBB78_3 -; SSE-NEXT: .LBB78_1: +; SSE-NEXT: jmp .LBB80_3 +; SSE-NEXT: .LBB80_1: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm4 ; SSE-NEXT: addss %xmm4, %xmm4 -; SSE-NEXT: .LBB78_3: +; SSE-NEXT: .LBB80_3: ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_4 +; SSE-NEXT: js .LBB80_4 ; SSE-NEXT: # BB#5: ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 -; SSE-NEXT: jmp .LBB78_6 -; SSE-NEXT: .LBB78_4: +; SSE-NEXT: jmp .LBB80_6 +; SSE-NEXT: .LBB80_4: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm0 ; SSE-NEXT: addss %xmm0, %xmm0 -; SSE-NEXT: .LBB78_6: +; SSE-NEXT: .LBB80_6: ; SSE-NEXT: pshufd {{.*#+}} xmm5 = xmm5[2,3,0,1] ; SSE-NEXT: movd %xmm5, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_7 +; SSE-NEXT: js .LBB80_7 ; SSE-NEXT: # BB#8: ; SSE-NEXT: cvtsi2ssq %rax, %xmm6 -; SSE-NEXT: jmp .LBB78_9 -; SSE-NEXT: .LBB78_7: +; SSE-NEXT: jmp .LBB80_9 +; SSE-NEXT: .LBB80_7: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm6 ; SSE-NEXT: addss %xmm6, %xmm6 -; SSE-NEXT: .LBB78_9: +; SSE-NEXT: .LBB80_9: ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] ; SSE-NEXT: movd %xmm1, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_10 +; SSE-NEXT: js .LBB80_10 ; SSE-NEXT: # BB#11: ; SSE-NEXT: xorps %xmm5, %xmm5 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5 -; SSE-NEXT: jmp .LBB78_12 -; SSE-NEXT: .LBB78_10: +; SSE-NEXT: jmp .LBB80_12 +; SSE-NEXT: .LBB80_10: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -4104,29 +4274,29 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: xorps %xmm5, %xmm5 ; SSE-NEXT: cvtsi2ssq %rax, %xmm5 ; SSE-NEXT: addss %xmm5, %xmm5 -; SSE-NEXT: .LBB78_12: +; SSE-NEXT: .LBB80_12: ; SSE-NEXT: movd %xmm3, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_13 +; SSE-NEXT: js .LBB80_13 ; SSE-NEXT: # BB#14: ; SSE-NEXT: cvtsi2ssq %rax, %xmm7 -; SSE-NEXT: jmp .LBB78_15 -; SSE-NEXT: .LBB78_13: +; SSE-NEXT: jmp .LBB80_15 +; SSE-NEXT: .LBB80_13: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax ; SSE-NEXT: orq %rcx, %rax ; SSE-NEXT: cvtsi2ssq %rax, %xmm7 ; SSE-NEXT: addss %xmm7, %xmm7 -; SSE-NEXT: .LBB78_15: +; SSE-NEXT: .LBB80_15: ; SSE-NEXT: movd %xmm2, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_16 +; SSE-NEXT: js .LBB80_16 ; SSE-NEXT: # BB#17: ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 -; SSE-NEXT: jmp .LBB78_18 -; SSE-NEXT: .LBB78_16: +; SSE-NEXT: jmp .LBB80_18 +; SSE-NEXT: .LBB80_16: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -4134,18 +4304,18 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: xorps %xmm1, %xmm1 ; SSE-NEXT: cvtsi2ssq %rax, %xmm1 ; SSE-NEXT: addss %xmm1, %xmm1 -; SSE-NEXT: .LBB78_18: +; SSE-NEXT: .LBB80_18: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm6[0],xmm5[1],xmm6[1] ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm3[2,3,0,1] ; SSE-NEXT: movd %xmm3, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_19 +; SSE-NEXT: js .LBB80_19 ; SSE-NEXT: # BB#20: ; SSE-NEXT: xorps %xmm3, %xmm3 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 -; SSE-NEXT: jmp .LBB78_21 -; SSE-NEXT: .LBB78_19: +; SSE-NEXT: jmp .LBB80_21 +; SSE-NEXT: .LBB80_19: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -4153,18 +4323,18 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: xorps %xmm3, %xmm3 ; SSE-NEXT: cvtsi2ssq %rax, %xmm3 ; SSE-NEXT: addss %xmm3, %xmm3 -; SSE-NEXT: .LBB78_21: +; SSE-NEXT: .LBB80_21: ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[1],xmm7[1] ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1] ; SSE-NEXT: movd %xmm2, %rax ; SSE-NEXT: testq %rax, %rax -; SSE-NEXT: js .LBB78_22 +; SSE-NEXT: js .LBB80_22 ; SSE-NEXT: # BB#23: ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 -; SSE-NEXT: jmp .LBB78_24 -; SSE-NEXT: .LBB78_22: +; SSE-NEXT: jmp .LBB80_24 +; SSE-NEXT: .LBB80_22: ; SSE-NEXT: movq %rax, %rcx ; SSE-NEXT: shrq %rcx ; SSE-NEXT: andl $1, %eax @@ -4172,7 +4342,7 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; SSE-NEXT: xorps %xmm2, %xmm2 ; SSE-NEXT: cvtsi2ssq %rax, %xmm2 ; SSE-NEXT: addss %xmm2, %xmm2 -; SSE-NEXT: .LBB78_24: +; SSE-NEXT: .LBB80_24: ; SSE-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1] ; SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] ; SSE-NEXT: retq @@ -4183,123 +4353,123 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX1-NEXT: vmovdqa 32(%rdi), %ymm2 ; AVX1-NEXT: vpextrq $1, %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_1 +; AVX1-NEXT: js .LBB80_1 ; AVX1-NEXT: # BB#2: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX1-NEXT: jmp .LBB78_3 -; AVX1-NEXT: .LBB78_1: +; AVX1-NEXT: jmp .LBB80_3 +; AVX1-NEXT: .LBB80_1: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX1-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX1-NEXT: .LBB78_3: +; AVX1-NEXT: .LBB80_3: ; AVX1-NEXT: vmovq %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_4 +; AVX1-NEXT: js .LBB80_4 ; AVX1-NEXT: # BB#5: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3 -; AVX1-NEXT: jmp .LBB78_6 -; AVX1-NEXT: .LBB78_4: +; AVX1-NEXT: jmp .LBB80_6 +; AVX1-NEXT: .LBB80_4: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3 ; AVX1-NEXT: vaddss %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: .LBB78_6: +; AVX1-NEXT: .LBB80_6: ; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 ; AVX1-NEXT: vmovq %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_7 +; AVX1-NEXT: js .LBB80_7 ; AVX1-NEXT: # BB#8: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4 -; AVX1-NEXT: jmp .LBB78_9 -; AVX1-NEXT: .LBB78_7: +; AVX1-NEXT: jmp .LBB80_9 +; AVX1-NEXT: .LBB80_7: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4 ; AVX1-NEXT: vaddss %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: .LBB78_9: +; AVX1-NEXT: .LBB80_9: ; AVX1-NEXT: vpextrq $1, %xmm2, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_10 +; AVX1-NEXT: js .LBB80_10 ; AVX1-NEXT: # BB#11: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 -; AVX1-NEXT: jmp .LBB78_12 -; AVX1-NEXT: .LBB78_10: +; AVX1-NEXT: jmp .LBB80_12 +; AVX1-NEXT: .LBB80_10: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB78_12: +; AVX1-NEXT: .LBB80_12: ; AVX1-NEXT: vpextrq $1, %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_13 +; AVX1-NEXT: js .LBB80_13 ; AVX1-NEXT: # BB#14: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5 -; AVX1-NEXT: jmp .LBB78_15 -; AVX1-NEXT: .LBB78_13: +; AVX1-NEXT: jmp .LBB80_15 +; AVX1-NEXT: .LBB80_13: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5 ; AVX1-NEXT: vaddss %xmm5, %xmm5, %xmm5 -; AVX1-NEXT: .LBB78_15: +; AVX1-NEXT: .LBB80_15: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] ; AVX1-NEXT: vmovq %xmm0, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_16 +; AVX1-NEXT: js .LBB80_16 ; AVX1-NEXT: # BB#17: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3 -; AVX1-NEXT: jmp .LBB78_18 -; AVX1-NEXT: .LBB78_16: +; AVX1-NEXT: jmp .LBB80_18 +; AVX1-NEXT: .LBB80_16: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3 ; AVX1-NEXT: vaddss %xmm3, %xmm3, %xmm3 -; AVX1-NEXT: .LBB78_18: +; AVX1-NEXT: .LBB80_18: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm4[0],xmm1[3] ; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[2,3] ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 ; AVX1-NEXT: vmovq %xmm4, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_19 +; AVX1-NEXT: js .LBB80_19 ; AVX1-NEXT: # BB#20: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm5 -; AVX1-NEXT: jmp .LBB78_21 -; AVX1-NEXT: .LBB78_19: +; AVX1-NEXT: jmp .LBB80_21 +; AVX1-NEXT: .LBB80_19: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0 ; AVX1-NEXT: vaddss %xmm0, %xmm0, %xmm5 -; AVX1-NEXT: .LBB78_21: +; AVX1-NEXT: .LBB80_21: ; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0] ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1],xmm5[0],xmm3[3] ; AVX1-NEXT: vpextrq $1, %xmm4, %rax ; AVX1-NEXT: testq %rax, %rax -; AVX1-NEXT: js .LBB78_22 +; AVX1-NEXT: js .LBB80_22 ; AVX1-NEXT: # BB#23: ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2 -; AVX1-NEXT: jmp .LBB78_24 -; AVX1-NEXT: .LBB78_22: +; AVX1-NEXT: jmp .LBB80_24 +; AVX1-NEXT: .LBB80_22: ; AVX1-NEXT: movq %rax, %rcx ; AVX1-NEXT: shrq %rcx ; AVX1-NEXT: andl $1, %eax ; AVX1-NEXT: orq %rcx, %rax ; AVX1-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2 ; AVX1-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: .LBB78_24: +; AVX1-NEXT: .LBB80_24: ; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX1-NEXT: retq @@ -4310,123 +4480,123 @@ define <8 x float> @uitofp_load_8i64_to_8f32(<8 x i64> *%a) { ; AVX2-NEXT: vmovdqa 32(%rdi), %ymm2 ; AVX2-NEXT: vpextrq $1, %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_1 +; AVX2-NEXT: js .LBB80_1 ; AVX2-NEXT: # BB#2: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 -; AVX2-NEXT: jmp .LBB78_3 -; AVX2-NEXT: .LBB78_1: +; AVX2-NEXT: jmp .LBB80_3 +; AVX2-NEXT: .LBB80_1: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1 ; AVX2-NEXT: vaddss %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: .LBB78_3: +; AVX2-NEXT: .LBB80_3: ; AVX2-NEXT: vmovq %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_4 +; AVX2-NEXT: js .LBB80_4 ; AVX2-NEXT: # BB#5: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3 -; AVX2-NEXT: jmp .LBB78_6 -; AVX2-NEXT: .LBB78_4: +; AVX2-NEXT: jmp .LBB80_6 +; AVX2-NEXT: .LBB80_4: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm3, %xmm3 ; AVX2-NEXT: vaddss %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: .LBB78_6: +; AVX2-NEXT: .LBB80_6: ; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2 ; AVX2-NEXT: vmovq %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_7 +; AVX2-NEXT: js .LBB80_7 ; AVX2-NEXT: # BB#8: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4 -; AVX2-NEXT: jmp .LBB78_9 -; AVX2-NEXT: .LBB78_7: +; AVX2-NEXT: jmp .LBB80_9 +; AVX2-NEXT: .LBB80_7: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm4, %xmm4 ; AVX2-NEXT: vaddss %xmm4, %xmm4, %xmm4 -; AVX2-NEXT: .LBB78_9: +; AVX2-NEXT: .LBB80_9: ; AVX2-NEXT: vpextrq $1, %xmm2, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_10 +; AVX2-NEXT: js .LBB80_10 ; AVX2-NEXT: # BB#11: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 -; AVX2-NEXT: jmp .LBB78_12 -; AVX2-NEXT: .LBB78_10: +; AVX2-NEXT: jmp .LBB80_12 +; AVX2-NEXT: .LBB80_10: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm2 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: .LBB78_12: +; AVX2-NEXT: .LBB80_12: ; AVX2-NEXT: vpextrq $1, %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_13 +; AVX2-NEXT: js .LBB80_13 ; AVX2-NEXT: # BB#14: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5 -; AVX2-NEXT: jmp .LBB78_15 -; AVX2-NEXT: .LBB78_13: +; AVX2-NEXT: jmp .LBB80_15 +; AVX2-NEXT: .LBB80_13: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm5, %xmm5 ; AVX2-NEXT: vaddss %xmm5, %xmm5, %xmm5 -; AVX2-NEXT: .LBB78_15: +; AVX2-NEXT: .LBB80_15: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] ; AVX2-NEXT: vmovq %xmm0, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_16 +; AVX2-NEXT: js .LBB80_16 ; AVX2-NEXT: # BB#17: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3 -; AVX2-NEXT: jmp .LBB78_18 -; AVX2-NEXT: .LBB78_16: +; AVX2-NEXT: jmp .LBB80_18 +; AVX2-NEXT: .LBB80_16: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm3 ; AVX2-NEXT: vaddss %xmm3, %xmm3, %xmm3 -; AVX2-NEXT: .LBB78_18: +; AVX2-NEXT: .LBB80_18: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm4[0],xmm1[3] ; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[2,3] ; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 ; AVX2-NEXT: vmovq %xmm4, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_19 +; AVX2-NEXT: js .LBB80_19 ; AVX2-NEXT: # BB#20: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm5 -; AVX2-NEXT: jmp .LBB78_21 -; AVX2-NEXT: .LBB78_19: +; AVX2-NEXT: jmp .LBB80_21 +; AVX2-NEXT: .LBB80_19: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm0 ; AVX2-NEXT: vaddss %xmm0, %xmm0, %xmm5 -; AVX2-NEXT: .LBB78_21: +; AVX2-NEXT: .LBB80_21: ; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm2[0] ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0,1],xmm5[0],xmm3[3] ; AVX2-NEXT: vpextrq $1, %xmm4, %rax ; AVX2-NEXT: testq %rax, %rax -; AVX2-NEXT: js .LBB78_22 +; AVX2-NEXT: js .LBB80_22 ; AVX2-NEXT: # BB#23: ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2 -; AVX2-NEXT: jmp .LBB78_24 -; AVX2-NEXT: .LBB78_22: +; AVX2-NEXT: jmp .LBB80_24 +; AVX2-NEXT: .LBB80_22: ; AVX2-NEXT: movq %rax, %rcx ; AVX2-NEXT: shrq %rcx ; AVX2-NEXT: andl $1, %eax ; AVX2-NEXT: orq %rcx, %rax ; AVX2-NEXT: vcvtsi2ssq %rax, %xmm6, %xmm2 ; AVX2-NEXT: vaddss %xmm2, %xmm2, %xmm2 -; AVX2-NEXT: .LBB78_24: +; AVX2-NEXT: .LBB80_24: ; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; AVX2-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX2-NEXT: retq -- 2.50.1