From: Elena Demikhovsky Date: Wed, 4 May 2016 14:31:18 +0000 (+0000) Subject: The test files are auto-generated by update_llc_test_checks.py utility. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=d14cbec643b733b19d37fb69e8d7c5bafc79f4f5;p=llvm The test files are auto-generated by update_llc_test_checks.py utility. No functional changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268498 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/sse-intrinsics-x86.ll b/test/CodeGen/X86/sse-intrinsics-x86.ll index 0857189be73..0449b2aa942 100644 --- a/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -1,8 +1,17 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL define <4 x float> @test_x86_sse_add_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: addss +; SSE-LABEL: test_x86_sse_add_ss: +; SSE: ## BB#0: +; SSE-NEXT: addss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_add_ss: +; KNL: ## BB#0: +; KNL-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -10,7 +19,15 @@ declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_cmp_ps(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: cmpordps +; SSE-LABEL: test_x86_sse_cmp_ps: +; SSE: ## BB#0: +; SSE-NEXT: cmpordps %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cmp_ps: +; KNL: ## BB#0: +; KNL-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -18,7 +35,15 @@ declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind define <4 x float> @test_x86_sse_cmp_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: cmpordss +; SSE-LABEL: test_x86_sse_cmp_ss: +; SSE: ## BB#0: +; SSE-NEXT: cmpordss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cmp_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -26,9 +51,19 @@ declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind define i32 @test_x86_sse_comieq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: sete - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comieq_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: sete %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comieq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: sete %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -36,9 +71,19 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: setae - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comige_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comige_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -46,9 +91,19 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: seta - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comigt_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comigt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -56,9 +111,19 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: setbe - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comile_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: setbe %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comile_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: setbe %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -66,8 +131,19 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: sbb +; SSE-LABEL: test_x86_sse_comilt_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: sbbl %eax, %eax +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comilt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: sbbl %eax, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -75,9 +151,19 @@ declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comineq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: comiss - ; CHECK: setne - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_comineq_ss: +; SSE: ## BB#0: +; SSE-NEXT: comiss %xmm1, %xmm0 +; SSE-NEXT: setne %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_comineq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vcomiss %xmm1, %xmm0 +; KNL-NEXT: setne %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -85,8 +171,17 @@ declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_x86_sse_cvtsi2ss(<4 x float> %a0) { - ; CHECK: movl - ; CHECK: cvtsi2ss +; SSE-LABEL: test_x86_sse_cvtsi2ss: +; SSE: ## BB#0: +; SSE-NEXT: movl $7, %eax +; SSE-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cvtsi2ss: +; KNL: ## BB#0: +; KNL-NEXT: movl $7, %eax +; KNL-NEXT: vcvtsi2ssl %eax, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -94,7 +189,15 @@ declare <4 x float> @llvm.x86.sse.cvtsi2ss(<4 x float>, i32) nounwind readnone define i32 @test_x86_sse_cvtss2si(<4 x float> %a0) { - ; CHECK: cvtss2si +; SSE-LABEL: test_x86_sse_cvtss2si: +; SSE: ## BB#0: +; SSE-NEXT: cvtss2si %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cvtss2si: +; KNL: ## BB#0: +; KNL-NEXT: vcvtss2si %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) ; [#uses=1] ret i32 %res } @@ -102,7 +205,15 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone define i32 @test_x86_sse_cvttss2si(<4 x float> %a0) { - ; CHECK: cvttss2si +; SSE-LABEL: test_x86_sse_cvttss2si: +; SSE: ## BB#0: +; SSE-NEXT: cvttss2si %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_cvttss2si: +; KNL: ## BB#0: +; KNL-NEXT: vcvttss2si %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.cvttss2si(<4 x float> %a0) ; [#uses=1] ret i32 %res } @@ -110,7 +221,15 @@ declare i32 @llvm.x86.sse.cvttss2si(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_div_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: divss +; SSE-LABEL: test_x86_sse_div_ss: +; SSE: ## BB#0: +; SSE-NEXT: divss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_div_ss: +; KNL: ## BB#0: +; KNL-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -118,8 +237,17 @@ declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>) nounwind read define void @test_x86_sse_ldmxcsr(i8* %a0) { - ; CHECK: movl - ; CHECK: ldmxcsr +; SSE-LABEL: test_x86_sse_ldmxcsr: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: ldmxcsr (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ldmxcsr: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vldmxcsr (%eax) +; KNL-NEXT: retl call void @llvm.x86.sse.ldmxcsr(i8* %a0) ret void } @@ -128,7 +256,15 @@ declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind define <4 x float> @test_x86_sse_max_ps(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: maxps +; SSE-LABEL: test_x86_sse_max_ps: +; SSE: ## BB#0: +; SSE-NEXT: maxps %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_max_ps: +; KNL: ## BB#0: +; KNL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -136,7 +272,15 @@ declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_max_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: maxss +; SSE-LABEL: test_x86_sse_max_ss: +; SSE: ## BB#0: +; SSE-NEXT: maxss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_max_ss: +; KNL: ## BB#0: +; KNL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -144,7 +288,15 @@ declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_min_ps(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: minps +; SSE-LABEL: test_x86_sse_min_ps: +; SSE: ## BB#0: +; SSE-NEXT: minps %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_min_ps: +; KNL: ## BB#0: +; KNL-NEXT: vminps %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -152,7 +304,15 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_min_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: minss +; SSE-LABEL: test_x86_sse_min_ss: +; SSE: ## BB#0: +; SSE-NEXT: minss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_min_ss: +; KNL: ## BB#0: +; KNL-NEXT: vminss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -160,7 +320,15 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read define i32 @test_x86_sse_movmsk_ps(<4 x float> %a0) { - ; CHECK: movmskps +; SSE-LABEL: test_x86_sse_movmsk_ps: +; SSE: ## BB#0: +; SSE-NEXT: movmskps %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_movmsk_ps: +; KNL: ## BB#0: +; KNL-NEXT: vmovmskps %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) ; [#uses=1] ret i32 %res } @@ -169,7 +337,15 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_mul_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: mulss +; SSE-LABEL: test_x86_sse_mul_ss: +; SSE: ## BB#0: +; SSE-NEXT: mulss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_mul_ss: +; KNL: ## BB#0: +; KNL-NEXT: vmulss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -177,7 +353,15 @@ declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_x86_sse_rcp_ps(<4 x float> %a0) { - ; CHECK: rcpps +; SSE-LABEL: test_x86_sse_rcp_ps: +; SSE: ## BB#0: +; SSE-NEXT: rcpps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rcp_ps: +; KNL: ## BB#0: +; KNL-NEXT: vrcpps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -185,7 +369,15 @@ declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_rcp_ss(<4 x float> %a0) { - ; CHECK: rcpss +; SSE-LABEL: test_x86_sse_rcp_ss: +; SSE: ## BB#0: +; SSE-NEXT: rcpss %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rcp_ss: +; KNL: ## BB#0: +; KNL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -193,7 +385,15 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_rsqrt_ps(<4 x float> %a0) { - ; CHECK: rsqrtps +; SSE-LABEL: test_x86_sse_rsqrt_ps: +; SSE: ## BB#0: +; SSE-NEXT: rsqrtps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rsqrt_ps: +; KNL: ## BB#0: +; KNL-NEXT: vrsqrtps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -201,7 +401,15 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_rsqrt_ss(<4 x float> %a0) { - ; CHECK: rsqrtss +; SSE-LABEL: test_x86_sse_rsqrt_ss: +; SSE: ## BB#0: +; SSE-NEXT: rsqrtss %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_rsqrt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -209,7 +417,15 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_sqrt_ps(<4 x float> %a0) { - ; CHECK: sqrtps +; SSE-LABEL: test_x86_sse_sqrt_ps: +; SSE: ## BB#0: +; SSE-NEXT: sqrtps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_sqrt_ps: +; KNL: ## BB#0: +; KNL-NEXT: vsqrtps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -217,7 +433,15 @@ declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_x86_sse_sqrt_ss(<4 x float> %a0) { - ; CHECK: sqrtss +; SSE-LABEL: test_x86_sse_sqrt_ss: +; SSE: ## BB#0: +; SSE-NEXT: sqrtss %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_sqrt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -225,8 +449,17 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone define void @test_x86_sse_stmxcsr(i8* %a0) { - ; CHECK: movl - ; CHECK: stmxcsr +; SSE-LABEL: test_x86_sse_stmxcsr: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: stmxcsr (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_stmxcsr: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vstmxcsr (%eax) +; KNL-NEXT: retl call void @llvm.x86.sse.stmxcsr(i8* %a0) ret void } @@ -234,8 +467,17 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) { - ; CHECK: movl - ; CHECK: movups +; SSE-LABEL: test_x86_sse_storeu_ps: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movups %xmm0, (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_storeu_ps: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vmovups %xmm0, (%eax) +; KNL-NEXT: retl call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1) ret void } @@ -243,7 +485,15 @@ declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind define <4 x float> @test_x86_sse_sub_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: subss +; SSE-LABEL: test_x86_sse_sub_ss: +; SSE: ## BB#0: +; SSE-NEXT: subss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_sub_ss: +; KNL: ## BB#0: +; KNL-NEXT: vsubss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -251,9 +501,19 @@ declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>) nounwind read define i32 @test_x86_sse_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: sete - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomieq_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: sete %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomieq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: sete %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -261,9 +521,19 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setae - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomige_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomige_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -271,9 +541,19 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: seta - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomigt_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomigt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -281,9 +561,19 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setbe - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomile_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: setbe %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomile_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: setbe %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -291,8 +581,19 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: sbbl +; SSE-LABEL: test_x86_sse_ucomilt_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: sbbl %eax, %eax +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomilt_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: sbbl %eax, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } @@ -300,9 +601,19 @@ declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) { - ; CHECK: ucomiss - ; CHECK: setne - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse_ucomineq_ss: +; SSE: ## BB#0: +; SSE-NEXT: ucomiss %xmm1, %xmm0 +; SSE-NEXT: setne %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse_ucomineq_ss: +; KNL: ## BB#0: +; KNL-NEXT: vucomiss %xmm1, %xmm0 +; KNL-NEXT: setne %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res } diff --git a/test/CodeGen/X86/sse2-intrinsics-x86.ll b/test/CodeGen/X86/sse2-intrinsics-x86.ll index 53132a167fb..bbabc66e496 100644 --- a/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1,8 +1,17 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s +; NOTE: Assertions have been autogenerated by update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse2 | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=KNL define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: addsd +; SSE-LABEL: test_x86_sse2_add_sd: +; SSE: ## BB#0: +; SSE-NEXT: addsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_add_sd: +; KNL: ## BB#0: +; KNL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -10,7 +19,15 @@ declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_x86_sse2_cmp_pd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: cmpordpd +; SSE-LABEL: test_x86_sse2_cmp_pd: +; SSE: ## BB#0: +; SSE-NEXT: cmpordpd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cmp_pd: +; KNL: ## BB#0: +; KNL-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -18,7 +35,15 @@ declare <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double>, <2 x double>, i8) nounw define <2 x double> @test_x86_sse2_cmp_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: cmpordsd +; SSE-LABEL: test_x86_sse2_cmp_sd: +; SSE: ## BB#0: +; SSE-NEXT: cmpordsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cmp_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -26,9 +51,19 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounw define i32 @test_x86_sse2_comieq_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: comisd - ; CHECK: sete - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_comieq_sd: +; SSE: ## BB#0: +; SSE-NEXT: comisd %xmm1, %xmm0 +; SSE-NEXT: sete %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_comieq_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcomisd %xmm1, %xmm0 +; KNL-NEXT: sete %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -36,9 +71,19 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: comisd - ; CHECK: setae - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_comige_sd: +; SSE: ## BB#0: +; SSE-NEXT: comisd %xmm1, %xmm0 +; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_comige_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcomisd %xmm1, %xmm0 +; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -46,9 +91,19 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: comisd - ; CHECK: seta - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_comigt_sd: +; SSE: ## BB#0: +; SSE-NEXT: comisd %xmm1, %xmm0 +; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_comigt_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcomisd %xmm1, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -56,9 +111,19 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: comisd - ; CHECK: setbe - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_comile_sd: +; SSE: ## BB#0: +; SSE-NEXT: comisd %xmm1, %xmm0 +; SSE-NEXT: setbe %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_comile_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcomisd %xmm1, %xmm0 +; KNL-NEXT: setbe %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -66,9 +131,19 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: comisd - ; CHECK: sbbl %eax, %eax - ; CHECK: andl $1, %eax +; SSE-LABEL: test_x86_sse2_comilt_sd: +; SSE: ## BB#0: +; SSE-NEXT: comisd %xmm1, %xmm0 +; SSE-NEXT: sbbl %eax, %eax +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_comilt_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcomisd %xmm1, %xmm0 +; KNL-NEXT: sbbl %eax, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -76,9 +151,19 @@ declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comineq_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: comisd - ; CHECK: setne - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_comineq_sd: +; SSE: ## BB#0: +; SSE-NEXT: comisd %xmm1, %xmm0 +; SSE-NEXT: setne %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_comineq_sd: +; KNL: ## BB#0: +; KNL-NEXT: vcomisd %xmm1, %xmm0 +; KNL-NEXT: setne %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -86,7 +171,15 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) { - ; CHECK: cvtdq2pd +; SSE-LABEL: test_x86_sse2_cvtdq2pd: +; SSE: ## BB#0: +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtdq2pd: +; KNL: ## BB#0: +; KNL-NEXT: vcvtdq2pd %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -94,7 +187,15 @@ declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { - ; CHECK: cvtdq2ps +; SSE-LABEL: test_x86_sse2_cvtdq2ps: +; SSE: ## BB#0: +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtdq2ps: +; KNL: ## BB#0: +; KNL-NEXT: vcvtdq2ps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -102,7 +203,15 @@ declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) { - ; CHECK: cvtpd2dq +; SSE-LABEL: test_x86_sse2_cvtpd2dq: +; SSE: ## BB#0: +; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtpd2dq: +; KNL: ## BB#0: +; KNL-NEXT: vcvtpd2dq %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -110,7 +219,15 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone define <4 x float> @test_x86_sse2_cvtpd2ps(<2 x double> %a0) { - ; CHECK: cvtpd2ps +; SSE-LABEL: test_x86_sse2_cvtpd2ps: +; SSE: ## BB#0: +; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtpd2ps: +; KNL: ## BB#0: +; KNL-NEXT: vcvtpd2ps %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -118,7 +235,15 @@ declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) { - ; CHECK: cvtps2dq +; SSE-LABEL: test_x86_sse2_cvtps2dq: +; SSE: ## BB#0: +; SSE-NEXT: cvtps2dq %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtps2dq: +; KNL: ## BB#0: +; KNL-NEXT: vcvtps2dq %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -126,7 +251,15 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) { - ; CHECK: cvtps2pd +; SSE-LABEL: test_x86_sse2_cvtps2pd: +; SSE: ## BB#0: +; SSE-NEXT: cvtps2pd %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtps2pd: +; KNL: ## BB#0: +; KNL-NEXT: vcvtps2pd %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -134,7 +267,15 @@ declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone define i32 @test_x86_sse2_cvtsd2si(<2 x double> %a0) { - ; CHECK: cvtsd2si +; SSE-LABEL: test_x86_sse2_cvtsd2si: +; SSE: ## BB#0: +; SSE-NEXT: cvtsd2si %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsd2si: +; KNL: ## BB#0: +; KNL-NEXT: vcvtsd2si %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ; [#uses=1] ret i32 %res } @@ -142,8 +283,15 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone define <4 x float> @test_x86_sse2_cvtsd2ss(<4 x float> %a0, <2 x double> %a1) { - ; CHECK: cvtsd2ss - ; SSE-NOT: cvtsd2ss %xmm{{[0-9]+}}, %xmm{{[0-9]+}}, %xmm{{[0-9]+}} +; SSE-LABEL: test_x86_sse2_cvtsd2ss: +; SSE: ## BB#0: +; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsd2ss: +; KNL: ## BB#0: +; KNL-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -151,8 +299,17 @@ declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0) { - ; CHECK: movl - ; CHECK: cvtsi2sd +; SSE-LABEL: test_x86_sse2_cvtsi2sd: +; SSE: ## BB#0: +; SSE-NEXT: movl $7, %eax +; SSE-NEXT: cvtsi2sdl %eax, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtsi2sd: +; KNL: ## BB#0: +; KNL-NEXT: movl $7, %eax +; KNL-NEXT: vcvtsi2sdl %eax, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -160,7 +317,15 @@ declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnon define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) { - ; CHECK: cvtss2sd +; SSE-LABEL: test_x86_sse2_cvtss2sd: +; SSE: ## BB#0: +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvtss2sd: +; KNL: ## BB#0: +; KNL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -168,7 +333,15 @@ declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind define <4 x i32> @test_x86_sse2_cvttpd2dq(<2 x double> %a0) { - ; CHECK: cvttpd2dq +; SSE-LABEL: test_x86_sse2_cvttpd2dq: +; SSE: ## BB#0: +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvttpd2dq: +; KNL: ## BB#0: +; KNL-NEXT: vcvttpd2dq %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -176,7 +349,15 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone define <4 x i32> @test_x86_sse2_cvttps2dq(<4 x float> %a0) { - ; CHECK: cvttps2dq +; SSE-LABEL: test_x86_sse2_cvttps2dq: +; SSE: ## BB#0: +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvttps2dq: +; KNL: ## BB#0: +; KNL-NEXT: vcvttps2dq %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -184,7 +365,15 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone define i32 @test_x86_sse2_cvttsd2si(<2 x double> %a0) { - ; CHECK: cvttsd2si +; SSE-LABEL: test_x86_sse2_cvttsd2si: +; SSE: ## BB#0: +; SSE-NEXT: cvttsd2si %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_cvttsd2si: +; KNL: ## BB#0: +; KNL-NEXT: vcvttsd2si %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ; [#uses=1] ret i32 %res } @@ -192,7 +381,15 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: divsd +; SSE-LABEL: test_x86_sse2_div_sd: +; SSE: ## BB#0: +; SSE-NEXT: divsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_div_sd: +; KNL: ## BB#0: +; KNL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -201,7 +398,15 @@ declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_x86_sse2_max_pd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: maxpd +; SSE-LABEL: test_x86_sse2_max_pd: +; SSE: ## BB#0: +; SSE-NEXT: maxpd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_max_pd: +; KNL: ## BB#0: +; KNL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -209,7 +414,15 @@ declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_x86_sse2_max_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: maxsd +; SSE-LABEL: test_x86_sse2_max_sd: +; SSE: ## BB#0: +; SSE-NEXT: maxsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_max_sd: +; KNL: ## BB#0: +; KNL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -217,7 +430,15 @@ declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_x86_sse2_min_pd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: minpd +; SSE-LABEL: test_x86_sse2_min_pd: +; SSE: ## BB#0: +; SSE-NEXT: minpd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_min_pd: +; KNL: ## BB#0: +; KNL-NEXT: vminpd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -225,7 +446,15 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_x86_sse2_min_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: minsd +; SSE-LABEL: test_x86_sse2_min_sd: +; SSE: ## BB#0: +; SSE-NEXT: minsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_min_sd: +; KNL: ## BB#0: +; KNL-NEXT: vminsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -233,7 +462,15 @@ declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind define i32 @test_x86_sse2_movmsk_pd(<2 x double> %a0) { - ; CHECK: movmskpd +; SSE-LABEL: test_x86_sse2_movmsk_pd: +; SSE: ## BB#0: +; SSE-NEXT: movmskpd %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_movmsk_pd: +; KNL: ## BB#0: +; KNL-NEXT: vmovmskpd %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ; [#uses=1] ret i32 %res } @@ -243,8 +480,15 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: test_x86_sse2_mul_sd - ; CHECK: mulsd +; SSE-LABEL: test_x86_sse2_mul_sd: +; SSE: ## BB#0: +; SSE-NEXT: mulsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_mul_sd: +; KNL: ## BB#0: +; KNL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -252,7 +496,15 @@ declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind define <8 x i16> @test_x86_sse2_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) { - ; CHECK: packssdw +; SSE-LABEL: test_x86_sse2_packssdw_128: +; SSE: ## BB#0: +; SSE-NEXT: packssdw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_packssdw_128: +; KNL: ## BB#0: +; KNL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -260,7 +512,15 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea define <16 x i8> @test_x86_sse2_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: packsswb +; SSE-LABEL: test_x86_sse2_packsswb_128: +; SSE: ## BB#0: +; SSE-NEXT: packsswb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_packsswb_128: +; KNL: ## BB#0: +; KNL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -268,7 +528,15 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea define <16 x i8> @test_x86_sse2_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: packuswb +; SSE-LABEL: test_x86_sse2_packuswb_128: +; SSE: ## BB#0: +; SSE-NEXT: packuswb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_packuswb_128: +; KNL: ## BB#0: +; KNL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -276,7 +544,15 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: paddsb +; SSE-LABEL: test_x86_sse2_padds_b: +; SSE: ## BB#0: +; SSE-NEXT: paddsb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_padds_b: +; KNL: ## BB#0: +; KNL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -284,7 +560,15 @@ declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: paddsw +; SSE-LABEL: test_x86_sse2_padds_w: +; SSE: ## BB#0: +; SSE-NEXT: paddsw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_padds_w: +; KNL: ## BB#0: +; KNL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -292,7 +576,15 @@ declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: paddusb +; SSE-LABEL: test_x86_sse2_paddus_b: +; SSE: ## BB#0: +; SSE-NEXT: paddusb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_paddus_b: +; KNL: ## BB#0: +; KNL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -300,7 +592,15 @@ declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnon define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: paddusw +; SSE-LABEL: test_x86_sse2_paddus_w: +; SSE: ## BB#0: +; SSE-NEXT: paddusw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_paddus_w: +; KNL: ## BB#0: +; KNL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -308,7 +608,15 @@ declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnon define <16 x i8> @test_x86_sse2_pavg_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: pavgb +; SSE-LABEL: test_x86_sse2_pavg_b: +; SSE: ## BB#0: +; SSE-NEXT: pavgb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pavg_b: +; KNL: ## BB#0: +; KNL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -316,7 +624,15 @@ declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_x86_sse2_pavg_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: pavgw +; SSE-LABEL: test_x86_sse2_pavg_w: +; SSE: ## BB#0: +; SSE-NEXT: pavgw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pavg_w: +; KNL: ## BB#0: +; KNL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -324,7 +640,15 @@ declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: pmaddwd +; SSE-LABEL: test_x86_sse2_pmadd_wd: +; SSE: ## BB#0: +; SSE-NEXT: pmaddwd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmadd_wd: +; KNL: ## BB#0: +; KNL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -332,7 +656,15 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnon define <8 x i16> @test_x86_sse2_pmaxs_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: pmaxsw +; SSE-LABEL: test_x86_sse2_pmaxs_w: +; SSE: ## BB#0: +; SSE-NEXT: pmaxsw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmaxs_w: +; KNL: ## BB#0: +; KNL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -340,7 +672,15 @@ declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_x86_sse2_pmaxu_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: pmaxub +; SSE-LABEL: test_x86_sse2_pmaxu_b: +; SSE: ## BB#0: +; SSE-NEXT: pmaxub %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmaxu_b: +; KNL: ## BB#0: +; KNL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -348,7 +688,15 @@ declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_x86_sse2_pmins_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: pminsw +; SSE-LABEL: test_x86_sse2_pmins_w: +; SSE: ## BB#0: +; SSE-NEXT: pminsw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmins_w: +; KNL: ## BB#0: +; KNL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -356,7 +704,15 @@ declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_x86_sse2_pminu_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: pminub +; SSE-LABEL: test_x86_sse2_pminu_b: +; SSE: ## BB#0: +; SSE-NEXT: pminub %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pminu_b: +; KNL: ## BB#0: +; KNL-NEXT: vpminub %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -364,7 +720,15 @@ declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone define i32 @test_x86_sse2_pmovmskb_128(<16 x i8> %a0) { - ; CHECK: pmovmskb +; SSE-LABEL: test_x86_sse2_pmovmskb_128: +; SSE: ## BB#0: +; SSE-NEXT: pmovmskb %xmm0, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmovmskb_128: +; KNL: ## BB#0: +; KNL-NEXT: vpmovmskb %xmm0, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0) ; [#uses=1] ret i32 %res } @@ -372,7 +736,15 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone define <8 x i16> @test_x86_sse2_pmulh_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: pmulhw +; SSE-LABEL: test_x86_sse2_pmulh_w: +; SSE: ## BB#0: +; SSE-NEXT: pmulhw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmulh_w: +; KNL: ## BB#0: +; KNL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -380,7 +752,15 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i16> @test_x86_sse2_pmulhu_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: pmulhuw +; SSE-LABEL: test_x86_sse2_pmulhu_w: +; SSE: ## BB#0: +; SSE-NEXT: pmulhuw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmulhu_w: +; KNL: ## BB#0: +; KNL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -388,7 +768,15 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnon define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) { - ; CHECK: pmuludq +; SSE-LABEL: test_x86_sse2_pmulu_dq: +; SSE: ## BB#0: +; SSE-NEXT: pmuludq %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pmulu_dq: +; KNL: ## BB#0: +; KNL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -396,7 +784,15 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnon define <2 x i64> @test_x86_sse2_psad_bw(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: psadbw +; SSE-LABEL: test_x86_sse2_psad_bw: +; SSE: ## BB#0: +; SSE-NEXT: psadbw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psad_bw: +; KNL: ## BB#0: +; KNL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -404,7 +800,15 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone define <4 x i32> @test_x86_sse2_psll_d(<4 x i32> %a0, <4 x i32> %a1) { - ; CHECK: pslld +; SSE-LABEL: test_x86_sse2_psll_d: +; SSE: ## BB#0: +; SSE-NEXT: pslld %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psll_d: +; KNL: ## BB#0: +; KNL-NEXT: vpslld %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -412,7 +816,15 @@ declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_x86_sse2_psll_q(<2 x i64> %a0, <2 x i64> %a1) { - ; CHECK: psllq +; SSE-LABEL: test_x86_sse2_psll_q: +; SSE: ## BB#0: +; SSE-NEXT: psllq %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psll_q: +; KNL: ## BB#0: +; KNL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -420,7 +832,15 @@ declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i16> @test_x86_sse2_psll_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: psllw +; SSE-LABEL: test_x86_sse2_psll_w: +; SSE: ## BB#0: +; SSE-NEXT: psllw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psll_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -428,7 +848,15 @@ declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone define <4 x i32> @test_x86_sse2_pslli_d(<4 x i32> %a0) { - ; CHECK: pslld +; SSE-LABEL: test_x86_sse2_pslli_d: +; SSE: ## BB#0: +; SSE-NEXT: pslld $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pslli_d: +; KNL: ## BB#0: +; KNL-NEXT: vpslld $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -436,7 +864,15 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone define <2 x i64> @test_x86_sse2_pslli_q(<2 x i64> %a0) { - ; CHECK: psllq +; SSE-LABEL: test_x86_sse2_pslli_q: +; SSE: ## BB#0: +; SSE-NEXT: psllq $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pslli_q: +; KNL: ## BB#0: +; KNL-NEXT: vpsllq $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -444,7 +880,15 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone define <8 x i16> @test_x86_sse2_pslli_w(<8 x i16> %a0) { - ; CHECK: psllw +; SSE-LABEL: test_x86_sse2_pslli_w: +; SSE: ## BB#0: +; SSE-NEXT: psllw $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pslli_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsllw $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -452,7 +896,15 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone define <4 x i32> @test_x86_sse2_psra_d(<4 x i32> %a0, <4 x i32> %a1) { - ; CHECK: psrad +; SSE-LABEL: test_x86_sse2_psra_d: +; SSE: ## BB#0: +; SSE-NEXT: psrad %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psra_d: +; KNL: ## BB#0: +; KNL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -460,7 +912,15 @@ declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i16> @test_x86_sse2_psra_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: psraw +; SSE-LABEL: test_x86_sse2_psra_w: +; SSE: ## BB#0: +; SSE-NEXT: psraw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psra_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -468,7 +928,15 @@ declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone define <4 x i32> @test_x86_sse2_psrai_d(<4 x i32> %a0) { - ; CHECK: psrad +; SSE-LABEL: test_x86_sse2_psrai_d: +; SSE: ## BB#0: +; SSE-NEXT: psrad $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrai_d: +; KNL: ## BB#0: +; KNL-NEXT: vpsrad $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -476,7 +944,15 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone define <8 x i16> @test_x86_sse2_psrai_w(<8 x i16> %a0) { - ; CHECK: psraw +; SSE-LABEL: test_x86_sse2_psrai_w: +; SSE: ## BB#0: +; SSE-NEXT: psraw $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrai_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsraw $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -484,7 +960,15 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone define <4 x i32> @test_x86_sse2_psrl_d(<4 x i32> %a0, <4 x i32> %a1) { - ; CHECK: psrld +; SSE-LABEL: test_x86_sse2_psrl_d: +; SSE: ## BB#0: +; SSE-NEXT: psrld %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrl_d: +; KNL: ## BB#0: +; KNL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -492,7 +976,15 @@ declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_x86_sse2_psrl_q(<2 x i64> %a0, <2 x i64> %a1) { - ; CHECK: psrlq +; SSE-LABEL: test_x86_sse2_psrl_q: +; SSE: ## BB#0: +; SSE-NEXT: psrlq %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrl_q: +; KNL: ## BB#0: +; KNL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -500,7 +992,15 @@ declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone define <8 x i16> @test_x86_sse2_psrl_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: psrlw +; SSE-LABEL: test_x86_sse2_psrl_w: +; SSE: ## BB#0: +; SSE-NEXT: psrlw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrl_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -508,7 +1008,15 @@ declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone define <4 x i32> @test_x86_sse2_psrli_d(<4 x i32> %a0) { - ; CHECK: psrld +; SSE-LABEL: test_x86_sse2_psrli_d: +; SSE: ## BB#0: +; SSE-NEXT: psrld $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrli_d: +; KNL: ## BB#0: +; KNL-NEXT: vpsrld $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %a0, i32 7) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -516,7 +1024,15 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone define <2 x i64> @test_x86_sse2_psrli_q(<2 x i64> %a0) { - ; CHECK: psrlq +; SSE-LABEL: test_x86_sse2_psrli_q: +; SSE: ## BB#0: +; SSE-NEXT: psrlq $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrli_q: +; KNL: ## BB#0: +; KNL-NEXT: vpsrlq $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } @@ -524,7 +1040,15 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { - ; CHECK: psrlw +; SSE-LABEL: test_x86_sse2_psrli_w: +; SSE: ## BB#0: +; SSE-NEXT: psrlw $7, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psrli_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsrlw $7, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %a0, i32 7) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -532,7 +1056,15 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: psubsb +; SSE-LABEL: test_x86_sse2_psubs_b: +; SSE: ## BB#0: +; SSE-NEXT: psubsb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psubs_b: +; KNL: ## BB#0: +; KNL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -540,7 +1072,15 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: psubsw +; SSE-LABEL: test_x86_sse2_psubs_w: +; SSE: ## BB#0: +; SSE-NEXT: psubsw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psubs_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -548,7 +1088,15 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { - ; CHECK: psubusb +; SSE-LABEL: test_x86_sse2_psubus_b: +; SSE: ## BB#0: +; SSE-NEXT: psubusb %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psubus_b: +; KNL: ## BB#0: +; KNL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -556,7 +1104,15 @@ declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnon define <8 x i16> @test_x86_sse2_psubus_w(<8 x i16> %a0, <8 x i16> %a1) { - ; CHECK: psubusw +; SSE-LABEL: test_x86_sse2_psubus_w: +; SSE: ## BB#0: +; SSE-NEXT: psubusw %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_psubus_w: +; KNL: ## BB#0: +; KNL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -564,7 +1120,15 @@ declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnon define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) { - ; CHECK: sqrtpd +; SSE-LABEL: test_x86_sse2_sqrt_pd: +; SSE: ## BB#0: +; SSE-NEXT: sqrtpd %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_sqrt_pd: +; KNL: ## BB#0: +; KNL-NEXT: vsqrtpd %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -572,7 +1136,15 @@ declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) { - ; CHECK: sqrtsd +; SSE-LABEL: test_x86_sse2_sqrt_sd: +; SSE: ## BB#0: +; SSE-NEXT: sqrtsd %xmm0, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_sqrt_sd: +; KNL: ## BB#0: +; KNL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -580,9 +1152,17 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { - ; CHECK: test_x86_sse2_storel_dq - ; CHECK: movl - ; CHECK: movlps +; SSE-LABEL: test_x86_sse2_storel_dq: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movlps %xmm0, (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_storel_dq: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vmovlps %xmm0, (%eax) +; KNL-NEXT: retl call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) ret void } @@ -590,10 +1170,20 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { - ; CHECK: test_x86_sse2_storeu_dq - ; CHECK: movl - ; CHECK: movdqu ; add operation forces the execution domain. +; SSE-LABEL: test_x86_sse2_storeu_dq: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: paddb LCPI71_0, %xmm0 +; SSE-NEXT: movdqu %xmm0, (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_storeu_dq: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vpaddb LCPI71_0, %xmm0, %xmm0 +; KNL-NEXT: vmovdqu %xmm0, (%eax) +; KNL-NEXT: retl %a2 = add <16 x i8> %a1, call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2) ret void @@ -602,10 +1192,24 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { - ; CHECK: test_x86_sse2_storeu_pd - ; CHECK: movl - ; CHECK: movupd ; fadd operation forces the execution domain. +; SSE-LABEL: test_x86_sse2_storeu_pd: +; SSE: ## BB#0: +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; SSE-NEXT: pslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; SSE-NEXT: addpd %xmm0, %xmm1 +; SSE-NEXT: movupd %xmm1, (%eax) +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_storeu_pd: +; KNL: ## BB#0: +; KNL-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; KNL-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; KNL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: vmovupd %xmm0, (%eax) +; KNL-NEXT: retl %a2 = fadd <2 x double> %a1, call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2) ret void @@ -614,8 +1218,15 @@ declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: test_x86_sse2_sub_sd - ; CHECK: subsd +; SSE-LABEL: test_x86_sse2_sub_sd: +; SSE: ## BB#0: +; SSE-NEXT: subsd %xmm1, %xmm0 +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_sub_sd: +; KNL: ## BB#0: +; KNL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; KNL-NEXT: retl %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -623,9 +1234,19 @@ declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: ucomisd - ; CHECK: sete - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_ucomieq_sd: +; SSE: ## BB#0: +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: sete %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_ucomieq_sd: +; KNL: ## BB#0: +; KNL-NEXT: vucomisd %xmm1, %xmm0 +; KNL-NEXT: sete %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -633,9 +1254,19 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: ucomisd - ; CHECK: setae - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_ucomige_sd: +; SSE: ## BB#0: +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_ucomige_sd: +; KNL: ## BB#0: +; KNL-NEXT: vucomisd %xmm1, %xmm0 +; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -643,9 +1274,19 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: ucomisd - ; CHECK: seta - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_ucomigt_sd: +; SSE: ## BB#0: +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_ucomigt_sd: +; KNL: ## BB#0: +; KNL-NEXT: vucomisd %xmm1, %xmm0 +; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -653,9 +1294,19 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: ucomisd - ; CHECK: setbe - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_ucomile_sd: +; SSE: ## BB#0: +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: setbe %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_ucomile_sd: +; KNL: ## BB#0: +; KNL-NEXT: vucomisd %xmm1, %xmm0 +; KNL-NEXT: setbe %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -663,8 +1314,19 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: ucomisd - ; CHECK: sbbl +; SSE-LABEL: test_x86_sse2_ucomilt_sd: +; SSE: ## BB#0: +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: sbbl %eax, %eax +; SSE-NEXT: andl $1, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_ucomilt_sd: +; KNL: ## BB#0: +; KNL-NEXT: vucomisd %xmm1, %xmm0 +; KNL-NEXT: sbbl %eax, %eax +; KNL-NEXT: andl $1, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } @@ -672,24 +1334,49 @@ declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) { - ; CHECK: ucomisd - ; CHECK: setne - ; CHECK: movzbl +; SSE-LABEL: test_x86_sse2_ucomineq_sd: +; SSE: ## BB#0: +; SSE-NEXT: ucomisd %xmm1, %xmm0 +; SSE-NEXT: setne %al +; SSE-NEXT: movzbl %al, %eax +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_ucomineq_sd: +; KNL: ## BB#0: +; KNL-NEXT: vucomisd %xmm1, %xmm0 +; KNL-NEXT: setne %al +; KNL-NEXT: movzbl %al, %eax +; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res } declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind readnone define void @test_x86_sse2_pause() { - ; CHECK: pause +; SSE-LABEL: test_x86_sse2_pause: +; SSE: ## BB#0: +; SSE-NEXT: pause +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pause: +; KNL: ## BB#0: +; KNL-NEXT: pause +; KNL-NEXT: retl tail call void @llvm.x86.sse2.pause() - ret void + ret void } declare void @llvm.x86.sse2.pause() nounwind define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) { -; CHECK-LABEL: test_x86_sse2_pshuf_d: -; CHECK: pshufd $27 +; SSE-LABEL: test_x86_sse2_pshuf_d: +; SSE: ## BB#0: ## %entry +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pshuf_d: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0] +; KNL-NEXT: retl entry: %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone ret <4 x i32> %res @@ -697,8 +1384,15 @@ entry: declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) { -; CHECK-LABEL: test_x86_sse2_pshufl_w: -; CHECK: pshuflw $27 +; SSE-LABEL: test_x86_sse2_pshufl_w: +; SSE: ## BB#0: ## %entry +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pshufl_w: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] +; KNL-NEXT: retl entry: %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone ret <8 x i16> %res @@ -706,8 +1400,15 @@ entry: declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) { -; CHECK-LABEL: test_x86_sse2_pshufh_w: -; CHECK: pshufhw $27 +; SSE-LABEL: test_x86_sse2_pshufh_w: +; SSE: ## BB#0: ## %entry +; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; SSE-NEXT: retl +; +; KNL-LABEL: test_x86_sse2_pshufh_w: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] +; KNL-NEXT: retl entry: %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone ret <8 x i16> %res