From: Simon Pilgrim Date: Wed, 10 May 2017 15:52:59 +0000 (+0000) Subject: [X86][SSE] Check vec_set BUILD_VECTOR tests on both 32 and 64-bit targets X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=22b377c4f52f0d3023bad048e2da839aa989a31c;p=llvm [X86][SSE] Check vec_set BUILD_VECTOR tests on both 32 and 64-bit targets git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302683 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/vec_set-2.ll b/test/CodeGen/X86/vec_set-2.ll index 443264cdffd..51c8b211110 100644 --- a/test/CodeGen/X86/vec_set-2.ll +++ b/test/CodeGen/X86/vec_set-2.ll @@ -1,11 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,-sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64 define <4 x float> @test1(float %a) nounwind { -; CHECK-LABEL: test1: -; CHECK: # BB#0: -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: retl +; X86-LABEL: test1: +; X86: # BB#0: +; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: retl +; +; X64-LABEL: test1: +; X64: # BB#0: +; X64-NEXT: xorps %xmm1, %xmm1 +; X64-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: retq %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 0 %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1 %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 2 @@ -14,10 +22,15 @@ define <4 x float> @test1(float %a) nounwind { } define <2 x i64> @test(i32 %a) nounwind { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: retl +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: retq %tmp = insertelement <4 x i32> zeroinitializer, i32 %a, i32 0 %tmp6 = insertelement <4 x i32> %tmp, i32 0, i32 1 %tmp8 = insertelement <4 x i32> %tmp6, i32 0, i32 2 diff --git a/test/CodeGen/X86/vec_set-3.ll b/test/CodeGen/X86/vec_set-3.ll index ee4a0859996..b34f30924a8 100644 --- a/test/CodeGen/X86/vec_set-3.ll +++ b/test/CodeGen/X86/vec_set-3.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+sse4.1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse4.1 | FileCheck %s --check-prefix=X64 define <4 x float> @test(float %a) { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,mem[0],zero,zero -; CHECK-NEXT: retl +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: insertps {{.*#+}} xmm0 = zero,mem[0],zero,zero +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero +; X64-NEXT: retq %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1 %tmp5 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 2 %tmp6 = insertelement <4 x float> %tmp5, float 0.000000e+00, i32 3 @@ -13,11 +19,17 @@ define <4 x float> @test(float %a) { } define <2 x i64> @test2(i32 %a) { -; CHECK-LABEL: test2: -; CHECK: # BB#0: -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] -; CHECK-NEXT: retl +; X86-LABEL: test2: +; X86: # BB#0: +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; X86-NEXT: retl +; +; X64-LABEL: test2: +; X64: # BB#0: +; X64-NEXT: movd %edi, %xmm0 +; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,0,1] +; X64-NEXT: retq %tmp7 = insertelement <4 x i32> zeroinitializer, i32 %a, i32 2 %tmp9 = insertelement <4 x i32> %tmp7, i32 0, i32 3 %tmp10 = bitcast <4 x i32> %tmp9 to <2 x i64> @@ -25,10 +37,15 @@ define <2 x i64> @test2(i32 %a) { } define <4 x float> @test3(<4 x float> %A) { -; CHECK-LABEL: test3: -; CHECK: # BB#0: -; CHECK-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero -; CHECK-NEXT: retl +; X86-LABEL: test3: +; X86: # BB#0: +; X86-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero +; X86-NEXT: retl +; +; X64-LABEL: test3: +; X64: # BB#0: +; X64-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero +; X64-NEXT: retq %tmp0 = extractelement <4 x float> %A, i32 0 %tmp1 = insertelement <4 x float> , float %tmp0, i32 1 %tmp2 = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 2 diff --git a/test/CodeGen/X86/vec_set-4.ll b/test/CodeGen/X86/vec_set-4.ll index 8f35529d61b..09142e16aa6 100644 --- a/test/CodeGen/X86/vec_set-4.ll +++ b/test/CodeGen/X86/vec_set-4.ll @@ -1,12 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 define <2 x i64> @test(i16 %a) nounwind { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: pinsrw $3, {{[0-9]+}}(%esp), %xmm0 -; CHECK-NEXT: retl +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: pxor %xmm0, %xmm0 +; X86-NEXT: pinsrw $3, {{[0-9]+}}(%esp), %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: pxor %xmm0, %xmm0 +; X64-NEXT: pinsrw $3, %edi, %xmm0 +; X64-NEXT: retq %tmp10 = insertelement <8 x i16> zeroinitializer, i16 %a, i32 3 %tmp12 = insertelement <8 x i16> %tmp10, i16 0, i32 4 %tmp14 = insertelement <8 x i16> %tmp12, i16 0, i32 5 @@ -17,12 +24,19 @@ define <2 x i64> @test(i16 %a) nounwind { } define <2 x i64> @test2(i8 %a) nounwind { -; CHECK-LABEL: test2: -; CHECK: # BB#0: -; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: pxor %xmm0, %xmm0 -; CHECK-NEXT: pinsrw $5, %eax, %xmm0 -; CHECK-NEXT: retl +; X86-LABEL: test2: +; X86: # BB#0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: pxor %xmm0, %xmm0 +; X86-NEXT: pinsrw $5, %eax, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test2: +; X64: # BB#0: +; X64-NEXT: movzbl %dil, %eax +; X64-NEXT: pxor %xmm0, %xmm0 +; X64-NEXT: pinsrw $5, %eax, %xmm0 +; X64-NEXT: retq %tmp24 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 10 %tmp26 = insertelement <16 x i8> %tmp24, i8 0, i32 11 %tmp28 = insertelement <16 x i8> %tmp26, i8 0, i32 12 diff --git a/test/CodeGen/X86/vec_set-6.ll b/test/CodeGen/X86/vec_set-6.ll index 4429834b8ef..3c9aca3a02d 100644 --- a/test/CodeGen/X86/vec_set-6.ll +++ b/test/CodeGen/X86/vec_set-6.ll @@ -1,13 +1,22 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,+sse4.1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,+sse4.1 | FileCheck %s --check-prefix=X64 define <4 x float> @test(float %a, float %b, float %c) nounwind { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1] -; CHECK-NEXT: retl +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1] +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-NEXT: xorps %xmm2, %xmm2 +; X64-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm1[0,1] +; X64-NEXT: retq %tmp = insertelement <4 x float> zeroinitializer, float %a, i32 1 %tmp8 = insertelement <4 x float> %tmp, float %b, i32 2 %tmp10 = insertelement <4 x float> %tmp8, float %c, i32 3 diff --git a/test/CodeGen/X86/vec_set-7.ll b/test/CodeGen/X86/vec_set-7.ll index e8fe6debb14..757a0d44cd4 100644 --- a/test/CodeGen/X86/vec_set-7.ll +++ b/test/CodeGen/X86/vec_set-7.ll @@ -1,12 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 define <2 x i64> @test(<2 x i64>* %p) nounwind { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: retl +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-NEXT: retq %tmp = bitcast <2 x i64>* %p to double* %tmp.upgrd.1 = load double, double* %tmp %tmp.upgrd.2 = insertelement <2 x double> undef, double %tmp.upgrd.1, i32 0 diff --git a/test/CodeGen/X86/vec_set-8.ll b/test/CodeGen/X86/vec_set-8.ll index 7a4326c01bb..a9dceb90855 100644 --- a/test/CodeGen/X86/vec_set-8.ll +++ b/test/CodeGen/X86/vec_set-8.ll @@ -1,11 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 define <2 x i64> @test(i64 %i) nounwind { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: movq %rdi, %xmm0 -; CHECK-NEXT: retq +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: retq %tmp10 = insertelement <2 x i64> undef, i64 %i, i32 0 %tmp11 = insertelement <2 x i64> %tmp10, i64 0, i32 1 ret <2 x i64> %tmp11 diff --git a/test/CodeGen/X86/vec_set-A.ll b/test/CodeGen/X86/vec_set-A.ll index cae39a3d775..259ace98d36 100644 --- a/test/CodeGen/X86/vec_set-A.ll +++ b/test/CodeGen/X86/vec_set-A.ll @@ -1,12 +1,19 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 define <2 x i64> @test1() nounwind { -; CHECK-LABEL: test1: -; CHECK: # BB#0: -; CHECK-NEXT: movl $1, %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: retl +; X86-LABEL: test1: +; X86: # BB#0: +; X86-NEXT: movl $1, %eax +; X86-NEXT: movd %eax, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test1: +; X64: # BB#0: +; X64-NEXT: movl $1, %eax +; X64-NEXT: movq %rax, %xmm0 +; X64-NEXT: retq ret <2 x i64> < i64 1, i64 0 > } diff --git a/test/CodeGen/X86/vec_set-B.ll b/test/CodeGen/X86/vec_set-B.ll index 0580a337665..ecd9b57cfd0 100644 --- a/test/CodeGen/X86/vec_set-B.ll +++ b/test/CodeGen/X86/vec_set-B.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 ; These should both generate something like this: ;_test3: @@ -9,26 +10,37 @@ ; ret define <2 x i64> @test3(i64 %arg) nounwind { -; CHECK-LABEL: test3: -; CHECK: # BB#0: -; CHECK-NEXT: movl $1234567, %eax # imm = 0x12D687 -; CHECK-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: retl +; X86-LABEL: test3: +; X86: # BB#0: +; X86-NEXT: movl $1234567, %eax # imm = 0x12D687 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movd %eax, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test3: +; X64: # BB#0: +; X64-NEXT: andl $1234567, %edi # imm = 0x12D687 +; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: retq %A = and i64 %arg, 1234567 %B = insertelement <2 x i64> zeroinitializer, i64 %A, i32 0 ret <2 x i64> %B } define <2 x i64> @test2(i64 %arg) nounwind { -; CHECK-LABEL: test2: -; CHECK: # BB#0: -; CHECK-NEXT: movl $1234567, %eax # imm = 0x12D687 -; CHECK-NEXT: andl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movd %eax, %xmm0 -; CHECK-NEXT: retl +; X86-LABEL: test2: +; X86: # BB#0: +; X86-NEXT: movl $1234567, %eax # imm = 0x12D687 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movd %eax, %xmm0 +; X86-NEXT: retl +; +; X64-LABEL: test2: +; X64: # BB#0: +; X64-NEXT: andl $1234567, %edi # imm = 0x12D687 +; X64-NEXT: movq %rdi, %xmm0 +; X64-NEXT: retq %A = and i64 %arg, 1234567 %B = insertelement <2 x i64> undef, i64 %A, i32 0 ret <2 x i64> %B } - diff --git a/test/CodeGen/X86/vec_set-C.ll b/test/CodeGen/X86/vec_set-C.ll index 994bc2b3056..865e2fb83f1 100644 --- a/test/CodeGen/X86/vec_set-C.ll +++ b/test/CodeGen/X86/vec_set-C.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-linux-gnu -mattr=+sse2,-avx | FileCheck %s --check-prefix=X32 +; RUN: llc < %s -mtriple=i386-linux-gnu -mattr=+sse2,-avx | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2,-avx | FileCheck %s --check-prefix=X64 define <2 x i64> @t1(i64 %x) nounwind { -; X32-LABEL: t1: -; X32: # BB#0: -; X32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X32-NEXT: retl +; X86-LABEL: t1: +; X86: # BB#0: +; X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X86-NEXT: retl ; ; X64-LABEL: t1: ; X64: # BB#0: diff --git a/test/CodeGen/X86/vec_set.ll b/test/CodeGen/X86/vec_set.ll index 49bd3beef75..6439a6dcb00 100644 --- a/test/CodeGen/X86/vec_set.ll +++ b/test/CodeGen/X86/vec_set.ll @@ -1,27 +1,48 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,-sse4.1 | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X86 +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2,-sse4.1 | FileCheck %s --check-prefix=X64 define void @test(<8 x i16>* %b, i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { -; CHECK-LABEL: test: -; CHECK: # BB#0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; CHECK-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; CHECK-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] -; CHECK-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] -; CHECK-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; CHECK-NEXT: movdqa %xmm3, (%eax) -; CHECK-NEXT: retl +; X86-LABEL: test: +; X86: # BB#0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X86-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X86-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X86-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X86-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; X86-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X86-NEXT: movdqa %xmm3, (%eax) +; X86-NEXT: retl +; +; X64-LABEL: test: +; X64: # BB#0: +; X64-NEXT: movd %r8d, %xmm0 +; X64-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; X64-NEXT: movd %edx, %xmm1 +; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] +; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X64-NEXT: movd %ecx, %xmm0 +; X64-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-NEXT: movd %r9d, %xmm2 +; X64-NEXT: movd %esi, %xmm3 +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] +; X64-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3] +; X64-NEXT: movdqa %xmm3, (%rdi) +; X64-NEXT: retq %tmp = insertelement <8 x i16> zeroinitializer, i16 %a0, i32 0 %tmp2 = insertelement <8 x i16> %tmp, i16 %a1, i32 1 %tmp4 = insertelement <8 x i16> %tmp2, i16 %a2, i32 2