From: Sanjay Patel Date: Wed, 19 Dec 2018 20:35:28 +0000 (+0000) Subject: [x86] add test to show ddup hole; NFC (PR37502) X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=34890f3f7b76051877778fff13d2678314c31759;p=llvm [x86] add test to show ddup hole; NFC (PR37502) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@349680 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/build-vector-128.ll b/test/CodeGen/X86/build-vector-128.ll index 6c0c2d30c31..9702e4be22a 100644 --- a/test/CodeGen/X86/build-vector-128.ll +++ b/test/CodeGen/X86/build-vector-128.ll @@ -507,3 +507,68 @@ define <4 x i32> @test_buildvector_v4i32_splat_zext_i8(i8 %in) { %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer ret <4 x i32> %splat } + +; PR37502 - https://bugs.llvm.org/show_bug.cgi?id=37502 +; Don't use a series of insertps when movddup will do. + +define <4 x float> @PR37502(float %x, float %y) { +; SSE2-32-LABEL: PR37502: +; SSE2-32: # %bb.0: +; SSE2-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE2-32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] +; SSE2-32-NEXT: retl +; +; SSE2-64-LABEL: PR37502: +; SSE2-64: # %bb.0: +; SSE2-64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE2-64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] +; SSE2-64-NEXT: retq +; +; SSE41-32-LABEL: PR37502: +; SSE41-32: # %bb.0: +; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; SSE41-32-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; SSE41-32-NEXT: retl +; +; SSE41-64-LABEL: PR37502: +; SSE41-64: # %bb.0: +; SSE41-64-NEXT: movaps %xmm0, %xmm2 +; SSE41-64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[2,3] +; SSE41-64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1],xmm0[0],xmm2[3] +; SSE41-64-NEXT: insertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm1[0] +; SSE41-64-NEXT: movaps %xmm2, %xmm0 +; SSE41-64-NEXT: retq +; +; AVX1-32-LABEL: PR37502: +; AVX1-32: # %bb.0: +; AVX1-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX1-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] +; AVX1-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] +; AVX1-32-NEXT: retl +; +; AVX1-64-LABEL: PR37502: +; AVX1-64: # %bb.0: +; AVX1-64-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0],xmm1[0],xmm0[2,3] +; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3] +; AVX1-64-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; AVX1-64-NEXT: retq +; +; AVX2-32-LABEL: PR37502: +; AVX2-32: # %bb.0: +; AVX2-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX2-32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX2-32-NEXT: retl +; +; AVX2-64-LABEL: PR37502: +; AVX2-64: # %bb.0: +; AVX2-64-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX2-64-NEXT: retq + %i0 = insertelement <4 x float> undef, float %x, i32 0 + %i1 = insertelement <4 x float> %i0, float %y, i32 1 + %i2 = insertelement <4 x float> %i1, float %x, i32 2 + %i3 = insertelement <4 x float> %i2, float %y, i32 3 + ret <4 x float> %i3 +} +