From bf59b4ffe0716c3b826785e7167545a56ee79b24 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 1 Aug 2017 15:14:35 +0000 Subject: [PATCH] [X86] Added missing cpu to fix generic scheduling model tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@309691 91177308-0d34-0410-b5e6-96231b3b80d8 --- test/CodeGen/X86/avx-schedule.ll | 566 +++++++++++++++++ test/CodeGen/X86/avx2-schedule.ll | 110 ++++ test/CodeGen/X86/bmi-schedule.ll | 72 +-- test/CodeGen/X86/bmi2-schedule.ll | 26 +- test/CodeGen/X86/f16c-schedule.ll | 28 + test/CodeGen/X86/lzcnt-schedule.ll | 14 +- test/CodeGen/X86/popcnt-schedule.ll | 26 +- test/CodeGen/X86/sse-schedule.ll | 360 +++++------ test/CodeGen/X86/sse2-schedule.ll | 916 ++++++++++++++-------------- test/CodeGen/X86/sse3-schedule.ll | 66 +- test/CodeGen/X86/sse41-schedule.ll | 354 +++++------ test/CodeGen/X86/sse42-schedule.ll | 96 +-- test/CodeGen/X86/sse4a-schedule.ll | 18 +- test/CodeGen/X86/ssse3-schedule.ll | 100 +-- 14 files changed, 1728 insertions(+), 1024 deletions(-) diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index 88b810262c6..c3b40a435ad 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL @@ -7,6 +8,12 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_addpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -37,6 +44,12 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_addps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -67,6 +80,12 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_addsubpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -98,6 +117,12 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub declare <4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double>, <4 x double>) nounwind readnone define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_addsubps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -129,6 +154,13 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwind readnone define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_andnotpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: ; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -170,6 +202,13 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub } define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_andnotps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: ; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -211,6 +250,13 @@ define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> } define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_andpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: ; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -250,6 +296,13 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_andps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: ; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -289,6 +342,13 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_blendpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] +; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: ; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] @@ -324,6 +384,12 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl } define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_blendps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] +; GENERIC-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: ; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] @@ -354,6 +420,12 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * } define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { +; GENERIC-LABEL: test_blendvpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; GENERIC-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: ; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] @@ -385,6 +457,12 @@ define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x doub declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>) nounwind readnone define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { +; GENERIC-LABEL: test_blendvps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] +; GENERIC-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: ; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] @@ -416,6 +494,11 @@ define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>) nounwind readnone define <8 x float> @test_broadcastf128(<4 x float> *%a0) { +; GENERIC-LABEL: test_broadcastf128: +; GENERIC: # BB#0: +; GENERIC-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_broadcastf128: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00] @@ -441,6 +524,11 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { } define <4 x double> @test_broadcastsd_ymm(double *%a0) { +; GENERIC-LABEL: test_broadcastsd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_broadcastsd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] @@ -467,6 +555,11 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { } define <4 x float> @test_broadcastss(float *%a0) { +; GENERIC-LABEL: test_broadcastss: +; GENERIC: # BB#0: +; GENERIC-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_broadcastss: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] @@ -493,6 +586,11 @@ define <4 x float> @test_broadcastss(float *%a0) { } define <8 x float> @test_broadcastss_ymm(float *%a0) { +; GENERIC-LABEL: test_broadcastss_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_broadcastss_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] @@ -519,6 +617,13 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { } define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_cmppd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -557,6 +662,13 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_cmpps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; GENERIC-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] @@ -595,6 +707,13 @@ define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { +; GENERIC-LABEL: test_cvtdq2pd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] @@ -630,6 +749,13 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { } define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { +; GENERIC-LABEL: test_cvtdq2ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [10:1.00] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] @@ -667,6 +793,13 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { } define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_cvtpd2dq: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] +; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] @@ -702,6 +835,13 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { } define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_cvtpd2ps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] +; GENERIC-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] +; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] @@ -737,6 +877,13 @@ define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { } define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_cvtps2dq: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] @@ -772,6 +919,12 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { } define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_divpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00] +; GENERIC-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: ; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00] @@ -802,6 +955,12 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_divps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00] +; GENERIC-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: ; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00] @@ -832,6 +991,12 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_dpps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] +; GENERIC-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: ; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] @@ -863,6 +1028,13 @@ define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x float> *%a2) { +; GENERIC-LABEL: test_extractf128: +; GENERIC: # BB#0: +; GENERIC-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_extractf128: ; SANDY: # BB#0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] @@ -896,6 +1068,12 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa } define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_haddpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_haddpd: ; SANDY: # BB#0: ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -927,6 +1105,12 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounwind readnone define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_haddps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; GENERIC-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: ; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] @@ -958,6 +1142,12 @@ define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind readnone define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_hsubpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; GENERIC-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] @@ -989,6 +1179,12 @@ define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounwind readnone define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_hsubps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] +; GENERIC-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] @@ -1020,6 +1216,13 @@ define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% declare <8 x float> @llvm.x86.avx.hsub.ps.256(<8 x float>, <8 x float>) nounwind readnone define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { +; GENERIC-LABEL: test_insertf128: +; GENERIC: # BB#0: +; GENERIC-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] +; GENERIC-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_insertf128: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] @@ -1057,6 +1260,11 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float } define <32 x i8> @test_lddqu(i8* %a0) { +; GENERIC-LABEL: test_lddqu: +; GENERIC: # BB#0: +; GENERIC-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: ; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] @@ -1082,6 +1290,13 @@ define <32 x i8> @test_lddqu(i8* %a0) { declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { +; GENERIC-LABEL: test_maskmovpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] +; GENERIC-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maskmovpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] @@ -1117,6 +1332,13 @@ declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>) nounwind readonly declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { +; GENERIC-LABEL: test_maskmovpd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00] +; GENERIC-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; GENERIC-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maskmovpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00] @@ -1152,6 +1374,13 @@ declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>) nounwind read declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwind define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { +; GENERIC-LABEL: test_maskmovps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] +; GENERIC-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maskmovps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] @@ -1187,6 +1416,13 @@ declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>) nounwind readonly declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { +; GENERIC-LABEL: test_maskmovps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] +; GENERIC-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; GENERIC-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maskmovps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] @@ -1222,6 +1458,12 @@ declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>) nounwind reado declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>) nounwind define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_maxpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1253,6 +1495,12 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> declare <4 x double> @llvm.x86.avx.max.pd.256(<4 x double>, <4 x double>) nounwind readnone define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_maxps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1284,6 +1532,12 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a declare <8 x float> @llvm.x86.avx.max.ps.256(<8 x float>, <8 x float>) nounwind readnone define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_minpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minpd: ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1315,6 +1569,12 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> declare <4 x double> @llvm.x86.avx.min.pd.256(<4 x double>, <4 x double>) nounwind readnone define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_minps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_minps: ; SANDY: # BB#0: ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -1346,6 +1606,13 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind readnone define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_movapd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] @@ -1380,6 +1647,13 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { } define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_movaps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: ; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] @@ -1414,6 +1688,13 @@ define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { } define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_movddup: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] +; GENERIC-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] @@ -1449,6 +1730,12 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { } define i32 @test_movmskpd(<4 x double> %a0) { +; GENERIC-LABEL: test_movmskpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] @@ -1477,6 +1764,12 @@ define i32 @test_movmskpd(<4 x double> %a0) { declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone define i32 @test_movmskps(<8 x float> %a0) { +; GENERIC-LABEL: test_movmskps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: ; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] @@ -1505,6 +1798,12 @@ define i32 @test_movmskps(<8 x float> %a0) { declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_movntpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] @@ -1534,6 +1833,12 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { } define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_movntps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] @@ -1563,6 +1868,13 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { } define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_movshdup: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] +; GENERIC-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] @@ -1598,6 +1910,13 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { } define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_movsldup: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] +; GENERIC-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] @@ -1633,6 +1952,13 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { } define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_movupd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovupd (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmovupd %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -1669,6 +1995,13 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { } define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_movups: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmovups (%rdi), %ymm0 # sched: [7:0.50] +; GENERIC-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vmovups %ymm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: ; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] @@ -1705,6 +2038,12 @@ define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { } define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_mulpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] @@ -1735,6 +2074,12 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_mulps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] @@ -1765,6 +2110,13 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: orpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: orpd: ; SANDY: # BB#0: ; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -1804,6 +2156,13 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) } define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_orps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: ; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -1843,6 +2202,13 @@ define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2 } define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { +; GENERIC-LABEL: test_permilpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] +; GENERIC-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] +; GENERIC-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] @@ -1878,6 +2244,13 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { } define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_permilpd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00] +; GENERIC-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00] @@ -1913,6 +2286,13 @@ define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { } define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { +; GENERIC-LABEL: test_permilps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] +; GENERIC-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] @@ -1948,6 +2328,13 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { } define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_permilps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00] +; GENERIC-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00] @@ -1983,6 +2370,12 @@ define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { } define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> *%a2) { +; GENERIC-LABEL: test_permilvarpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilvarpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -2014,6 +2407,12 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwind readnone define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_permilvarpd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilvarpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -2045,6 +2444,12 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>) nounwind readnone define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> *%a2) { +; GENERIC-LABEL: test_permilvarps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilvarps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] @@ -2076,6 +2481,12 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind readnone define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i32> *%a2) { +; GENERIC-LABEL: test_permilvarps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_permilvarps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -2107,6 +2518,13 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>) nounwind readnone define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_rcpps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vrcpps (%rdi), %ymm1 # sched: [9:1.00] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_rcpps: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] @@ -2143,6 +2561,13 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { declare <8 x float> @llvm.x86.avx.rcp.ps.256(<8 x float>) nounwind readnone define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_roundpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundpd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] @@ -2179,6 +2604,13 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { declare <4 x double> @llvm.x86.avx.round.pd.256(<4 x double>, i32) nounwind readnone define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_roundps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_roundps: ; SANDY: # BB#0: ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] @@ -2215,6 +2647,13 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readnone define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_rsqrtps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00] +; GENERIC-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:3.00] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: ; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00] @@ -2251,6 +2690,13 @@ define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_shufpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] +; GENERIC-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] @@ -2286,6 +2732,12 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double } define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { +; GENERIC-LABEL: test_shufps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] +; GENERIC-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] @@ -2316,6 +2768,13 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% } define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { +; GENERIC-LABEL: test_sqrtpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00] +; GENERIC-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:3.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00] @@ -2352,6 +2811,13 @@ define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { +; GENERIC-LABEL: test_sqrtps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00] +; GENERIC-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:3.00] +; GENERIC-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: ; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00] @@ -2388,6 +2854,12 @@ define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_subpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -2418,6 +2890,12 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_subps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [10:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] @@ -2448,6 +2926,15 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { +; GENERIC-LABEL: test_testpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] +; GENERIC-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: setb %al # sched: [1:1.00] +; GENERIC-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: adcl $0, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_testpd: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] @@ -2492,6 +2979,16 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnone define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_testpd_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] +; GENERIC-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: setb %al # sched: [1:1.00] +; GENERIC-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: adcl $0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_testpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] @@ -2539,6 +3036,15 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind readnone define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { +; GENERIC-LABEL: test_testps: +; GENERIC: # BB#0: +; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] +; GENERIC-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: setb %al # sched: [1:1.00] +; GENERIC-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: adcl $0, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_testps: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] @@ -2583,6 +3089,16 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_testps_ymm: +; GENERIC: # BB#0: +; GENERIC-NEXT: xorl %eax, %eax # sched: [1:0.33] +; GENERIC-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: setb %al # sched: [1:1.00] +; GENERIC-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] +; GENERIC-NEXT: adcl $0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_testps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] @@ -2630,6 +3146,13 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readnone define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_unpckhpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] +; GENERIC-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpckhpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] @@ -2665,6 +3188,12 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub } define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { +; GENERIC-LABEL: test_unpckhps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] +; GENERIC-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpckhps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] @@ -2695,6 +3224,13 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> } define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_unpcklpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] +; GENERIC-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] +; GENERIC-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] @@ -2730,6 +3266,12 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub } define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) nounwind { +; GENERIC-LABEL: test_unpcklps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] +; GENERIC-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] @@ -2760,6 +3302,13 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> } define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { +; GENERIC-LABEL: test_xorpd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: ; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -2799,6 +3348,13 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> } define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { +; GENERIC-LABEL: test_xorps: +; GENERIC: # BB#0: +; GENERIC-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: ; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] @@ -2838,6 +3394,11 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a } define void @test_zeroall() { +; GENERIC-LABEL: test_zeroall: +; GENERIC: # BB#0: +; GENERIC-NEXT: vzeroall +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_zeroall: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroall @@ -2863,6 +3424,11 @@ define void @test_zeroall() { declare void @llvm.x86.avx.vzeroall() nounwind define void @test_zeroupper() { +; GENERIC-LABEL: test_zeroupper: +; GENERIC: # BB#0: +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; SANDY-LABEL: test_zeroupper: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroupper diff --git a/test/CodeGen/X86/avx2-schedule.ll b/test/CodeGen/X86/avx2-schedule.ll index a3862d7e27c..971ebe5e711 100644 --- a/test/CodeGen/X86/avx2-schedule.ll +++ b/test/CodeGen/X86/avx2-schedule.ll @@ -1,9 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { +; GENERIC-LABEL: test_pabsb: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpabsb %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpabsb (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pabsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] @@ -26,6 +34,13 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { declare <32 x i8> @llvm.x86.avx2.pabs.b(<32 x i8>) nounwind readnone define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { +; GENERIC-LABEL: test_pabsd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpabsd %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpabsd (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] @@ -48,6 +63,13 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { declare <8 x i32> @llvm.x86.avx2.pabs.d(<8 x i32>) nounwind readnone define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { +; GENERIC-LABEL: test_pabsw: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpabsw %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpabsw (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] @@ -70,6 +92,12 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { declare <16 x i16> @llvm.x86.avx2.pabs.w(<16 x i16>) nounwind readnone define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { +; GENERIC-LABEL: test_paddb: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_paddb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -88,6 +116,12 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { } define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { +; GENERIC-LABEL: test_paddd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_paddd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -106,6 +140,12 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { } define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_paddq: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -124,6 +164,12 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { } define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { +; GENERIC-LABEL: test_paddw: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_paddw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -142,6 +188,13 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { } define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_pand: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -163,6 +216,13 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { } define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_pandn: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:1.00] +; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -186,6 +246,12 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { } define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { +; GENERIC-LABEL: test_pmulld: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] @@ -204,6 +270,12 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { } define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { +; GENERIC-LABEL: test_pmullw: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] @@ -222,6 +294,13 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) } define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_por: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] @@ -243,6 +322,12 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { } define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { +; GENERIC-LABEL: test_psubb: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -261,6 +346,12 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { } define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { +; GENERIC-LABEL: test_psubd: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -279,6 +370,12 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { } define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_psubq: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -297,6 +394,12 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { } define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { +; GENERIC-LABEL: test_psubw: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] @@ -315,6 +418,13 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { } define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { +; GENERIC-LABEL: test_pxor: +; GENERIC: # BB#0: +; GENERIC-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; GENERIC-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:1.00] +; GENERIC-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] diff --git a/test/CodeGen/X86/bmi-schedule.ll b/test/CodeGen/X86/bmi-schedule.ll index 87051150756..dd332fcef8a 100644 --- a/test/CodeGen/X86/bmi-schedule.ll +++ b/test/CodeGen/X86/bmi-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL @@ -9,12 +9,12 @@ define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) { ; GENERIC-LABEL: test_andn_i16: ; GENERIC: # BB#0: -; GENERIC-NEXT: andnl %esi, %edi, %eax -; GENERIC-NEXT: notl %edi -; GENERIC-NEXT: andw (%rdx), %di -; GENERIC-NEXT: addl %edi, %eax +; GENERIC-NEXT: andnl %esi, %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: notl %edi # sched: [1:0.33] +; GENERIC-NEXT: andw (%rdx), %di # sched: [5:0.50] +; GENERIC-NEXT: addl %edi, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: %AX %AX %EAX -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andn_i16: ; HASWELL: # BB#0: @@ -53,10 +53,10 @@ define i16 @test_andn_i16(i16 zeroext %a0, i16 zeroext %a1, i16 *%a2) { define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_andn_i32: ; GENERIC: # BB#0: -; GENERIC-NEXT: andnl %esi, %edi, %ecx -; GENERIC-NEXT: andnl (%rdx), %edi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: andnl %esi, %edi, %ecx # sched: [1:0.33] +; GENERIC-NEXT: andnl (%rdx), %edi, %eax # sched: [5:0.50] +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andn_i32: ; HASWELL: # BB#0: @@ -89,10 +89,10 @@ define i32 @test_andn_i32(i32 %a0, i32 %a1, i32 *%a2) { define i64 @test_andn_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_andn_i64: ; GENERIC: # BB#0: -; GENERIC-NEXT: andnq %rsi, %rdi, %rcx -; GENERIC-NEXT: andnq (%rdx), %rdi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: andnq %rsi, %rdi, %rcx # sched: [1:0.33] +; GENERIC-NEXT: andnq (%rdx), %rdi, %rax # sched: [5:0.50] +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_andn_i64: ; HASWELL: # BB#0: @@ -127,8 +127,8 @@ define i32 @test_bextr_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: bextrl %edi, (%rdx), %ecx ; GENERIC-NEXT: bextrl %edi, %esi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bextr_i32: ; HASWELL: # BB#0: @@ -163,8 +163,8 @@ define i64 @test_bextr_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: bextrq %rdi, (%rdx), %rcx ; GENERIC-NEXT: bextrq %rdi, %rsi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bextr_i64: ; HASWELL: # BB#0: @@ -199,8 +199,8 @@ define i32 @test_blsi_i32(i32 %a0, i32 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: blsil (%rsi), %ecx ; GENERIC-NEXT: blsil %edi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blsi_i32: ; HASWELL: # BB#0: @@ -236,8 +236,8 @@ define i64 @test_blsi_i64(i64 %a0, i64 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: blsiq (%rsi), %rcx ; GENERIC-NEXT: blsiq %rdi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blsi_i64: ; HASWELL: # BB#0: @@ -273,8 +273,8 @@ define i32 @test_blsmsk_i32(i32 %a0, i32 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: blsmskl (%rsi), %ecx ; GENERIC-NEXT: blsmskl %edi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blsmsk_i32: ; HASWELL: # BB#0: @@ -310,8 +310,8 @@ define i64 @test_blsmsk_i64(i64 %a0, i64 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: blsmskq (%rsi), %rcx ; GENERIC-NEXT: blsmskq %rdi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blsmsk_i64: ; HASWELL: # BB#0: @@ -347,8 +347,8 @@ define i32 @test_blsr_i32(i32 %a0, i32 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: blsrl (%rsi), %ecx ; GENERIC-NEXT: blsrl %edi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blsr_i32: ; HASWELL: # BB#0: @@ -384,8 +384,8 @@ define i64 @test_blsr_i64(i64 %a0, i64 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: blsrq (%rsi), %rcx ; GENERIC-NEXT: blsrq %rdi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_blsr_i64: ; HASWELL: # BB#0: @@ -421,9 +421,9 @@ define i16 @test_cttz_i16(i16 zeroext %a0, i16 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: tzcntw (%rsi), %cx ; GENERIC-NEXT: tzcntw %di, %ax -; GENERIC-NEXT: orl %ecx, %eax +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: %AX %AX %EAX -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cttz_i16: ; HASWELL: # BB#0: @@ -461,8 +461,8 @@ define i32 @test_cttz_i32(i32 %a0, i32 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: tzcntl (%rsi), %ecx ; GENERIC-NEXT: tzcntl %edi, %eax -; GENERIC-NEXT: orl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cttz_i32: ; HASWELL: # BB#0: @@ -497,8 +497,8 @@ define i64 @test_cttz_i64(i64 %a0, i64 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: tzcntq (%rsi), %rcx ; GENERIC-NEXT: tzcntq %rdi, %rax -; GENERIC-NEXT: orq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cttz_i64: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/bmi2-schedule.ll b/test/CodeGen/X86/bmi2-schedule.ll index 02ec6f041ce..e62edb1aec6 100644 --- a/test/CodeGen/X86/bmi2-schedule.ll +++ b/test/CodeGen/X86/bmi2-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+bmi2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL @@ -10,8 +10,8 @@ define i32 @test_bzhi_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: bzhil %edi, (%rdx), %ecx ; GENERIC-NEXT: bzhil %edi, %esi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bzhi_i32: ; HASWELL: # BB#0: @@ -39,8 +39,8 @@ define i64 @test_bzhi_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: bzhiq %rdi, (%rdx), %rcx ; GENERIC-NEXT: bzhiq %rdi, %rsi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_bzhi_i64: ; HASWELL: # BB#0: @@ -68,8 +68,8 @@ define i32 @test_pdep_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: pdepl (%rdx), %edi, %ecx ; GENERIC-NEXT: pdepl %esi, %edi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pdep_i32: ; HASWELL: # BB#0: @@ -97,8 +97,8 @@ define i64 @test_pdep_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: pdepq (%rdx), %rdi, %rcx ; GENERIC-NEXT: pdepq %rsi, %rdi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pdep_i64: ; HASWELL: # BB#0: @@ -126,8 +126,8 @@ define i32 @test_pext_i32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: pextl (%rdx), %edi, %ecx ; GENERIC-NEXT: pextl %esi, %edi, %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pext_i32: ; HASWELL: # BB#0: @@ -155,8 +155,8 @@ define i64 @test_pext_i64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC: # BB#0: ; GENERIC-NEXT: pextq (%rdx), %rdi, %rcx ; GENERIC-NEXT: pextq %rsi, %rdi, %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pext_i64: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/f16c-schedule.ll b/test/CodeGen/X86/f16c-schedule.ll index f36937c3a45..1a17b8bdbca 100644 --- a/test/CodeGen/X86/f16c-schedule.ll +++ b/test/CodeGen/X86/f16c-schedule.ll @@ -1,4 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL @@ -6,6 +7,13 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { +; GENERIC-LABEL: test_vcvtph2ps_128: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] +; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; IVY-LABEL: test_vcvtph2ps_128: ; IVY: # BB#0: ; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [7:1.00] @@ -42,6 +50,13 @@ define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { +; GENERIC-LABEL: test_vcvtph2ps_256: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] +; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; IVY-LABEL: test_vcvtph2ps_256: ; IVY: # BB#0: ; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] @@ -78,6 +93,12 @@ define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) { +; GENERIC-LABEL: test_vcvtps2ph_128: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; IVY-LABEL: test_vcvtps2ph_128: ; IVY: # BB#0: ; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] @@ -110,6 +131,13 @@ define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) { +; GENERIC-LABEL: test_vcvtps2ph_256: +; GENERIC: # BB#0: +; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] +; GENERIC-NEXT: vzeroupper +; GENERIC-NEXT: retq # sched: [1:1.00] +; ; IVY-LABEL: test_vcvtps2ph_256: ; IVY: # BB#0: ; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] diff --git a/test/CodeGen/X86/lzcnt-schedule.ll b/test/CodeGen/X86/lzcnt-schedule.ll index cd0dcbbd6af..64441c7e895 100644 --- a/test/CodeGen/X86/lzcnt-schedule.ll +++ b/test/CodeGen/X86/lzcnt-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+lzcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL @@ -11,9 +11,9 @@ define i16 @test_ctlz_i16(i16 zeroext %a0, i16 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: lzcntw (%rsi), %cx ; GENERIC-NEXT: lzcntw %di, %ax -; GENERIC-NEXT: orl %ecx, %eax +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: %AX %AX %EAX -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ctlz_i16: ; HASWELL: # BB#0: @@ -51,8 +51,8 @@ define i32 @test_ctlz_i32(i32 %a0, i32 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: lzcntl (%rsi), %ecx ; GENERIC-NEXT: lzcntl %edi, %eax -; GENERIC-NEXT: orl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ctlz_i32: ; HASWELL: # BB#0: @@ -87,8 +87,8 @@ define i64 @test_ctlz_i64(i64 %a0, i64 *%a1) { ; GENERIC: # BB#0: ; GENERIC-NEXT: lzcntq (%rsi), %rcx ; GENERIC-NEXT: lzcntq %rdi, %rax -; GENERIC-NEXT: orq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_ctlz_i64: ; HASWELL: # BB#0: diff --git a/test/CodeGen/X86/popcnt-schedule.ll b/test/CodeGen/X86/popcnt-schedule.ll index c0d11280fc1..4f0f21b711d 100644 --- a/test/CodeGen/X86/popcnt-schedule.ll +++ b/test/CodeGen/X86/popcnt-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+popcnt | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -13,11 +13,11 @@ define i16 @test_ctpop_i16(i16 zeroext %a0, i16 *%a1) { ; GENERIC-LABEL: test_ctpop_i16: ; GENERIC: # BB#0: -; GENERIC-NEXT: popcntw (%rsi), %cx -; GENERIC-NEXT: popcntw %di, %ax -; GENERIC-NEXT: orl %ecx, %eax +; GENERIC-NEXT: popcntw (%rsi), %cx # sched: [7:1.00] +; GENERIC-NEXT: popcntw %di, %ax # sched: [3:1.00] +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] ; GENERIC-NEXT: # kill: %AX %AX %EAX -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_ctpop_i16: ; SLM: # BB#0: @@ -69,10 +69,10 @@ declare i16 @llvm.ctpop.i16(i16) define i32 @test_ctpop_i32(i32 %a0, i32 *%a1) { ; GENERIC-LABEL: test_ctpop_i32: ; GENERIC: # BB#0: -; GENERIC-NEXT: popcntl (%rsi), %ecx -; GENERIC-NEXT: popcntl %edi, %eax -; GENERIC-NEXT: orl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: popcntl (%rsi), %ecx # sched: [7:1.00] +; GENERIC-NEXT: popcntl %edi, %eax # sched: [3:1.00] +; GENERIC-NEXT: orl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_ctpop_i32: ; SLM: # BB#0: @@ -119,10 +119,10 @@ declare i32 @llvm.ctpop.i32(i32) define i64 @test_ctpop_i64(i64 %a0, i64 *%a1) { ; GENERIC-LABEL: test_ctpop_i64: ; GENERIC: # BB#0: -; GENERIC-NEXT: popcntq (%rsi), %rcx -; GENERIC-NEXT: popcntq %rdi, %rax -; GENERIC-NEXT: orq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: popcntq (%rsi), %rcx # sched: [9:1.00] +; GENERIC-NEXT: popcntq %rdi, %rax # sched: [3:1.00] +; GENERIC-NEXT: orq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_ctpop_i64: ; SLM: # BB#0: diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll index 4164cb16bbd..699451a02f6 100644 --- a/test/CodeGen/X86/sse-schedule.ll +++ b/test/CodeGen/X86/sse-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -12,9 +12,9 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_addps: ; GENERIC: # BB#0: -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: addps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: addps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_addps: ; ATOM: # BB#0: @@ -60,9 +60,9 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a define float @test_addss(float %a0, float %a1, float *%a2) { ; GENERIC-LABEL: test_addss: ; GENERIC: # BB#0: -; GENERIC-NEXT: addss %xmm1, %xmm0 -; GENERIC-NEXT: addss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: addss (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_addss: ; ATOM: # BB#0: @@ -108,9 +108,9 @@ define float @test_addss(float %a0, float %a1, float *%a2) { define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_andps: ; GENERIC: # BB#0: -; GENERIC-NEXT: andps %xmm1, %xmm0 -; GENERIC-NEXT: andps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: andps %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: andps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andps: ; ATOM: # BB#0: @@ -168,9 +168,9 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_andnotps: ; GENERIC: # BB#0: -; GENERIC-NEXT: andnps %xmm1, %xmm0 -; GENERIC-NEXT: andnps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: andnps %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: andnps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andnotps: ; ATOM: # BB#0: @@ -230,10 +230,10 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_cmpps: ; GENERIC: # BB#0: -; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 -; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 -; GENERIC-NEXT: orps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cmpeqps %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: cmpeqps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cmpps: ; ATOM: # BB#0: @@ -288,9 +288,9 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a define float @test_cmpss(float %a0, float %a1, float *%a2) { ; GENERIC-LABEL: test_cmpss: ; GENERIC: # BB#0: -; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 -; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cmpeqss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: cmpeqss (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cmpss: ; ATOM: # BB#0: @@ -341,17 +341,17 @@ declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_comiss: ; GENERIC: # BB#0: -; GENERIC-NEXT: comiss %xmm1, %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %cl -; GENERIC-NEXT: andb %al, %cl -; GENERIC-NEXT: comiss (%rdi), %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %dl -; GENERIC-NEXT: andb %al, %dl -; GENERIC-NEXT: orb %cl, %dl -; GENERIC-NEXT: movzbl %dl, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: comiss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %cl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] +; GENERIC-NEXT: comiss (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %dl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] +; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] +; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_comiss: ; ATOM: # BB#0: @@ -447,10 +447,10 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; GENERIC-LABEL: test_cvtsi2ss: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 -; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 -; GENERIC-NEXT: addss %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsi2ssl %edi, %xmm1 # sched: [5:2.00] +; GENERIC-NEXT: cvtsi2ssl (%rsi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsi2ss: ; ATOM: # BB#0: @@ -503,10 +503,10 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; GENERIC-LABEL: test_cvtsi2ssq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 -; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 -; GENERIC-NEXT: addss %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsi2ssq %rdi, %xmm1 # sched: [5:2.00] +; GENERIC-NEXT: cvtsi2ssq (%rsi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsi2ssq: ; ATOM: # BB#0: @@ -559,10 +559,10 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { define i32 @test_cvtss2si(float %a0, float *%a1) { ; GENERIC-LABEL: test_cvtss2si: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtss2si %xmm0, %ecx -; GENERIC-NEXT: cvtss2si (%rdi), %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtss2si %xmm0, %ecx # sched: [5:1.00] +; GENERIC-NEXT: cvtss2si (%rdi), %eax # sched: [9:1.00] +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtss2si: ; ATOM: # BB#0: @@ -618,10 +618,10 @@ declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone define i64 @test_cvtss2siq(float %a0, float *%a1) { ; GENERIC-LABEL: test_cvtss2siq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtss2si %xmm0, %rcx -; GENERIC-NEXT: cvtss2si (%rdi), %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtss2si %xmm0, %rcx # sched: [5:1.00] +; GENERIC-NEXT: cvtss2si (%rdi), %rax # sched: [9:1.00] +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtss2siq: ; ATOM: # BB#0: @@ -677,10 +677,10 @@ declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>) nounwind readnone define i32 @test_cvttss2si(float %a0, float *%a1) { ; GENERIC-LABEL: test_cvttss2si: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvttss2si %xmm0, %ecx -; GENERIC-NEXT: cvttss2si (%rdi), %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvttss2si %xmm0, %ecx # sched: [5:1.00] +; GENERIC-NEXT: cvttss2si (%rdi), %eax # sched: [9:1.00] +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttss2si: ; ATOM: # BB#0: @@ -733,10 +733,10 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { define i64 @test_cvttss2siq(float %a0, float *%a1) { ; GENERIC-LABEL: test_cvttss2siq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvttss2si %xmm0, %rcx -; GENERIC-NEXT: cvttss2si (%rdi), %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvttss2si %xmm0, %rcx # sched: [5:1.00] +; GENERIC-NEXT: cvttss2si (%rdi), %rax # sched: [9:1.00] +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttss2siq: ; ATOM: # BB#0: @@ -789,9 +789,9 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_divps: ; GENERIC: # BB#0: -; GENERIC-NEXT: divps %xmm1, %xmm0 -; GENERIC-NEXT: divps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: divps %xmm1, %xmm0 # sched: [14:1.00] +; GENERIC-NEXT: divps (%rdi), %xmm0 # sched: [20:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_divps: ; ATOM: # BB#0: @@ -837,9 +837,9 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a define float @test_divss(float %a0, float %a1, float *%a2) { ; GENERIC-LABEL: test_divss: ; GENERIC: # BB#0: -; GENERIC-NEXT: divss %xmm1, %xmm0 -; GENERIC-NEXT: divss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: divss %xmm1, %xmm0 # sched: [14:1.00] +; GENERIC-NEXT: divss (%rdi), %xmm0 # sched: [20:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_divss: ; ATOM: # BB#0: @@ -885,9 +885,9 @@ define float @test_divss(float %a0, float %a1, float *%a2) { define void @test_ldmxcsr(i32 %a0) { ; GENERIC-LABEL: test_ldmxcsr: ; GENERIC: # BB#0: -; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) -; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; GENERIC-NEXT: ldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_ldmxcsr: ; ATOM: # BB#0: @@ -935,9 +935,9 @@ declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_maxps: ; GENERIC: # BB#0: -; GENERIC-NEXT: maxps %xmm1, %xmm0 -; GENERIC-NEXT: maxps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: maxps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: maxps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_maxps: ; ATOM: # BB#0: @@ -984,9 +984,9 @@ declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_maxss: ; GENERIC: # BB#0: -; GENERIC-NEXT: maxss %xmm1, %xmm0 -; GENERIC-NEXT: maxss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: maxss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: maxss (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_maxss: ; ATOM: # BB#0: @@ -1033,9 +1033,9 @@ declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_minps: ; GENERIC: # BB#0: -; GENERIC-NEXT: minps %xmm1, %xmm0 -; GENERIC-NEXT: minps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: minps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: minps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_minps: ; ATOM: # BB#0: @@ -1082,9 +1082,9 @@ declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind read define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_minss: ; GENERIC: # BB#0: -; GENERIC-NEXT: minss %xmm1, %xmm0 -; GENERIC-NEXT: minss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: minss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: minss (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_minss: ; ATOM: # BB#0: @@ -1131,10 +1131,10 @@ declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind read define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_movaps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movaps (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm0, %xmm0 -; GENERIC-NEXT: movaps %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movaps (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movaps %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movaps: ; ATOM: # BB#0: @@ -1188,8 +1188,8 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; GENERIC-LABEL: test_movhlps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] -; GENERIC-NEXT: retq +; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movhlps: ; ATOM: # BB#0: @@ -1235,11 +1235,11 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movhps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; GENERIC-NEXT: addps %xmm0, %xmm1 -; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] -; GENERIC-NEXT: movlps %xmm1, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00] +; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movhps: ; ATOM: # BB#0: @@ -1298,9 +1298,9 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; GENERIC-LABEL: test_movlhps: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlhps: ; ATOM: # BB#0: @@ -1345,10 +1345,10 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movlps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] -; GENERIC-NEXT: addps %xmm0, %xmm1 -; GENERIC-NEXT: movlps %xmm1, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlps: ; ATOM: # BB#0: @@ -1404,8 +1404,8 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { define i32 @test_movmskps(<4 x float> %a0) { ; GENERIC-LABEL: test_movmskps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movmskps %xmm0, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: movmskps %xmm0, %eax # sched: [2:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movmskps: ; ATOM: # BB#0: @@ -1446,8 +1446,8 @@ declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_movntps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movntps %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movntps %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movntps: ; ATOM: # BB#0: @@ -1491,10 +1491,10 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { define void @test_movss_mem(float* %a0, float* %a1) { ; GENERIC-LABEL: test_movss_mem: ; GENERIC: # BB#0: -; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; GENERIC-NEXT: addss %xmm0, %xmm0 -; GENERIC-NEXT: movss %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: addss %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movss %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movss_mem: ; ATOM: # BB#0: @@ -1546,8 +1546,8 @@ define void @test_movss_mem(float* %a0, float* %a1) { define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; GENERIC-LABEL: test_movss_reg: ; GENERIC: # BB#0: -; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] -; GENERIC-NEXT: retq +; GENERIC-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movss_reg: ; ATOM: # BB#0: @@ -1591,10 +1591,10 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_movups: ; GENERIC: # BB#0: -; GENERIC-NEXT: movups (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm0, %xmm0 -; GENERIC-NEXT: movups %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movups (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: addps %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movups %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movups: ; ATOM: # BB#0: @@ -1646,9 +1646,9 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_mulps: ; GENERIC: # BB#0: -; GENERIC-NEXT: mulps %xmm1, %xmm0 -; GENERIC-NEXT: mulps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: mulps %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: mulps (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mulps: ; ATOM: # BB#0: @@ -1694,9 +1694,9 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a define float @test_mulss(float %a0, float %a1, float *%a2) { ; GENERIC-LABEL: test_mulss: ; GENERIC: # BB#0: -; GENERIC-NEXT: mulss %xmm1, %xmm0 -; GENERIC-NEXT: mulss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: mulss %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: mulss (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mulss: ; ATOM: # BB#0: @@ -1742,9 +1742,9 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_orps: ; GENERIC: # BB#0: -; GENERIC-NEXT: orps %xmm1, %xmm0 -; GENERIC-NEXT: orps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: orps %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: orps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_orps: ; ATOM: # BB#0: @@ -1802,8 +1802,8 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 define void @test_prefetchnta(i8* %a0) { ; GENERIC-LABEL: test_prefetchnta: ; GENERIC: # BB#0: -; GENERIC-NEXT: prefetchnta (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: prefetchnta (%rdi) # sched: [5:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_prefetchnta: ; ATOM: # BB#0: @@ -1848,10 +1848,10 @@ declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_rcpps: ; GENERIC: # BB#0: -; GENERIC-NEXT: rcpps %xmm0, %xmm1 -; GENERIC-NEXT: rcpps (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: rcpps %xmm0, %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: rcpps (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_rcpps: ; ATOM: # BB#0: @@ -1909,11 +1909,11 @@ declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone define <4 x float> @test_rcpss(float %a0, float *%a1) { ; GENERIC-LABEL: test_rcpss: ; GENERIC: # BB#0: -; GENERIC-NEXT: rcpss %xmm0, %xmm0 -; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; GENERIC-NEXT: rcpss %xmm1, %xmm1 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: rcpss %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: rcpss %xmm1, %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_rcpss: ; ATOM: # BB#0: @@ -1975,10 +1975,10 @@ declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_rsqrtps: ; GENERIC: # BB#0: -; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 -; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: rsqrtps %xmm0, %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: rsqrtps (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_rsqrtps: ; ATOM: # BB#0: @@ -2036,11 +2036,11 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; GENERIC-LABEL: test_rsqrtss: ; GENERIC: # BB#0: -; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 -; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: rsqrtss %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: rsqrtss %xmm1, %xmm1 # sched: [5:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_rsqrtss: ; ATOM: # BB#0: @@ -2102,8 +2102,8 @@ declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone define void @test_sfence() { ; GENERIC-LABEL: test_sfence: ; GENERIC: # BB#0: -; GENERIC-NEXT: sfence -; GENERIC-NEXT: retq +; GENERIC-NEXT: sfence # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_sfence: ; ATOM: # BB#0: @@ -2148,9 +2148,9 @@ declare void @llvm.x86.sse.sfence() nounwind readnone define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) nounwind { ; GENERIC-LABEL: test_shufps: ; GENERIC: # BB#0: -; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] -; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] -; GENERIC-NEXT: retq +; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] +; GENERIC-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_shufps: ; ATOM: # BB#0: @@ -2200,10 +2200,10 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_sqrtps: ; GENERIC: # BB#0: -; GENERIC-NEXT: sqrtps %xmm0, %xmm1 -; GENERIC-NEXT: sqrtps (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: sqrtps %xmm0, %xmm1 # sched: [14:1.00] +; GENERIC-NEXT: sqrtps (%rdi), %xmm0 # sched: [20:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_sqrtps: ; ATOM: # BB#0: @@ -2260,11 +2260,11 @@ declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_sqrtss: ; GENERIC: # BB#0: -; GENERIC-NEXT: sqrtss %xmm0, %xmm0 -; GENERIC-NEXT: movaps (%rdi), %xmm1 -; GENERIC-NEXT: sqrtss %xmm1, %xmm1 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: sqrtss %xmm0, %xmm0 # sched: [14:1.00] +; GENERIC-NEXT: movaps (%rdi), %xmm1 # sched: [6:0.50] +; GENERIC-NEXT: sqrtss %xmm1, %xmm1 # sched: [14:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_sqrtss: ; ATOM: # BB#0: @@ -2324,9 +2324,9 @@ declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone define i32 @test_stmxcsr() { ; GENERIC-LABEL: test_stmxcsr: ; GENERIC: # BB#0: -; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) -; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: stmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] +; GENERIC-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_stmxcsr: ; ATOM: # BB#0: @@ -2374,9 +2374,9 @@ declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_subps: ; GENERIC: # BB#0: -; GENERIC-NEXT: subps %xmm1, %xmm0 -; GENERIC-NEXT: subps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: subps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: subps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_subps: ; ATOM: # BB#0: @@ -2422,9 +2422,9 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a define float @test_subss(float %a0, float %a1, float *%a2) { ; GENERIC-LABEL: test_subss: ; GENERIC: # BB#0: -; GENERIC-NEXT: subss %xmm1, %xmm0 -; GENERIC-NEXT: subss (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: subss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: subss (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_subss: ; ATOM: # BB#0: @@ -2470,17 +2470,17 @@ define float @test_subss(float %a0, float %a1, float *%a2) { define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_ucomiss: ; GENERIC: # BB#0: -; GENERIC-NEXT: ucomiss %xmm1, %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %cl -; GENERIC-NEXT: andb %al, %cl -; GENERIC-NEXT: ucomiss (%rdi), %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %dl -; GENERIC-NEXT: andb %al, %dl -; GENERIC-NEXT: orb %cl, %dl -; GENERIC-NEXT: movzbl %dl, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: ucomiss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %cl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] +; GENERIC-NEXT: ucomiss (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %dl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] +; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] +; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_ucomiss: ; ATOM: # BB#0: @@ -2576,9 +2576,9 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_unpckhps: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] -; GENERIC-NEXT: retq +; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] +; GENERIC-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_unpckhps: ; ATOM: # BB#0: @@ -2628,9 +2628,9 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_unpcklps: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] -; GENERIC-NEXT: retq +; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] +; GENERIC-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_unpcklps: ; ATOM: # BB#0: @@ -2680,9 +2680,9 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_xorps: ; GENERIC: # BB#0: -; GENERIC-NEXT: xorps %xmm1, %xmm0 -; GENERIC-NEXT: xorps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: xorps %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: xorps (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_xorps: ; ATOM: # BB#0: diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index af55e576137..fbc21390d2d 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -12,9 +12,9 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_addpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: addpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: addpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_addpd: ; ATOM: # BB#0: @@ -60,9 +60,9 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> define double @test_addsd(double %a0, double %a1, double *%a2) { ; GENERIC-LABEL: test_addsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: addsd %xmm1, %xmm0 -; GENERIC-NEXT: addsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: addsd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_addsd: ; ATOM: # BB#0: @@ -108,10 +108,10 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_andpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: andpd %xmm1, %xmm0 -; GENERIC-NEXT: andpd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: andpd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: andpd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andpd: ; ATOM: # BB#0: @@ -168,10 +168,10 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_andnotpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: andnpd %xmm1, %xmm0 -; GENERIC-NEXT: andnpd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: andnpd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: andnpd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_andnotpd: ; ATOM: # BB#0: @@ -230,10 +230,10 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_cmppd: ; GENERIC: # BB#0: -; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 -; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 -; GENERIC-NEXT: orpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cmpeqpd %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: cmpeqpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cmppd: ; ATOM: # BB#0: @@ -288,9 +288,9 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; GENERIC-LABEL: test_cmpsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 -; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cmpeqsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: cmpeqsd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cmpsd: ; ATOM: # BB#0: @@ -341,17 +341,17 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounw define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_comisd: ; GENERIC: # BB#0: -; GENERIC-NEXT: comisd %xmm1, %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %cl -; GENERIC-NEXT: andb %al, %cl -; GENERIC-NEXT: comisd (%rdi), %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %dl -; GENERIC-NEXT: andb %al, %dl -; GENERIC-NEXT: orb %cl, %dl -; GENERIC-NEXT: movzbl %dl, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: comisd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %cl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] +; GENERIC-NEXT: comisd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %dl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] +; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] +; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_comisd: ; ATOM: # BB#0: @@ -447,10 +447,10 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_cvtdq2pd: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 -; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtdq2pd %xmm0, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvtdq2pd (%rdi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtdq2pd: ; ATOM: # BB#0: @@ -505,10 +505,10 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_cvtdq2ps: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 -; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtdq2ps %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: cvtdq2ps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtdq2ps: ; ATOM: # BB#0: @@ -562,10 +562,10 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_cvtpd2dq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 -; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvtpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtpd2dq: ; ATOM: # BB#0: @@ -620,10 +620,10 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_cvtpd2ps: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 -; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtpd2ps %xmm0, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvtpd2ps (%rdi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtpd2ps: ; ATOM: # BB#0: @@ -678,10 +678,10 @@ declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_cvtps2dq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 -; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtps2dq %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: cvtps2dq (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtps2dq: ; ATOM: # BB#0: @@ -736,10 +736,10 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_cvtps2pd: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 -; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtps2pd %xmm0, %xmm1 # sched: [2:1.00] +; GENERIC-NEXT: cvtps2pd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtps2pd: ; ATOM: # BB#0: @@ -795,10 +795,10 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { define i32 @test_cvtsd2si(double %a0, double *%a1) { ; GENERIC-LABEL: test_cvtsd2si: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsd2si %xmm0, %ecx -; GENERIC-NEXT: cvtsd2si (%rdi), %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsd2si %xmm0, %ecx # sched: [5:1.00] +; GENERIC-NEXT: cvtsd2si (%rdi), %eax # sched: [9:1.00] +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsd2si: ; ATOM: # BB#0: @@ -854,10 +854,10 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; GENERIC-LABEL: test_cvtsd2siq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsd2si %xmm0, %rcx -; GENERIC-NEXT: cvtsd2si (%rdi), %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsd2si %xmm0, %rcx # sched: [5:1.00] +; GENERIC-NEXT: cvtsd2si (%rdi), %rax # sched: [9:1.00] +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsd2siq: ; ATOM: # BB#0: @@ -913,11 +913,11 @@ declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>) nounwind readnone define float @test_cvtsd2ss(double %a0, double *%a1) { ; GENERIC-LABEL: test_cvtsd2ss: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 -; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 -; GENERIC-NEXT: addss %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] +; GENERIC-NEXT: cvtsd2ss %xmm0, %xmm0 # sched: [4:1.00] +; GENERIC-NEXT: addss %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsd2ss: ; ATOM: # BB#0: @@ -977,10 +977,10 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; GENERIC-LABEL: test_cvtsi2sd: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 -; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 -; GENERIC-NEXT: addsd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsi2sdl %edi, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvtsi2sdl (%rsi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsi2sd: ; ATOM: # BB#0: @@ -1033,10 +1033,10 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; GENERIC-LABEL: test_cvtsi2sdq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 -; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 -; GENERIC-NEXT: addsd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtsi2sdq %rdi, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvtsi2sdq (%rsi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtsi2sdq: ; ATOM: # BB#0: @@ -1091,11 +1091,11 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { define double @test_cvtss2sd(float %a0, float *%a1) { ; GENERIC-LABEL: test_cvtss2sd: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 -; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 -; GENERIC-NEXT: addsd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvtss2sd %xmm0, %xmm1 # sched: [1:1.00] +; GENERIC-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: cvtss2sd %xmm0, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: addsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtss2sd: ; ATOM: # BB#0: @@ -1155,10 +1155,10 @@ define double @test_cvtss2sd(float %a0, float *%a1) { define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_cvttpd2dq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 -; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvttpd2dq %xmm0, %xmm1 # sched: [4:1.00] +; GENERIC-NEXT: cvttpd2dq (%rdi), %xmm0 # sched: [10:1.00] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttpd2dq: ; ATOM: # BB#0: @@ -1214,10 +1214,10 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_cvttps2dq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 -; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvttps2dq %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: cvttps2dq (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttps2dq: ; ATOM: # BB#0: @@ -1271,10 +1271,10 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { define i32 @test_cvttsd2si(double %a0, double *%a1) { ; GENERIC-LABEL: test_cvttsd2si: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvttsd2si %xmm0, %ecx -; GENERIC-NEXT: cvttsd2si (%rdi), %eax -; GENERIC-NEXT: addl %ecx, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvttsd2si %xmm0, %ecx # sched: [5:1.00] +; GENERIC-NEXT: cvttsd2si (%rdi), %eax # sched: [9:1.00] +; GENERIC-NEXT: addl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttsd2si: ; ATOM: # BB#0: @@ -1327,10 +1327,10 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; GENERIC-LABEL: test_cvttsd2siq: ; GENERIC: # BB#0: -; GENERIC-NEXT: cvttsd2si %xmm0, %rcx -; GENERIC-NEXT: cvttsd2si (%rdi), %rax -; GENERIC-NEXT: addq %rcx, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: cvttsd2si %xmm0, %rcx # sched: [5:1.00] +; GENERIC-NEXT: cvttsd2si (%rdi), %rax # sched: [9:1.00] +; GENERIC-NEXT: addq %rcx, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttsd2siq: ; ATOM: # BB#0: @@ -1383,9 +1383,9 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_divpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: divpd %xmm1, %xmm0 -; GENERIC-NEXT: divpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: divpd %xmm1, %xmm0 # sched: [22:1.00] +; GENERIC-NEXT: divpd (%rdi), %xmm0 # sched: [28:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_divpd: ; ATOM: # BB#0: @@ -1431,9 +1431,9 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> define double @test_divsd(double %a0, double %a1, double *%a2) { ; GENERIC-LABEL: test_divsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: divsd %xmm1, %xmm0 -; GENERIC-NEXT: divsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: divsd %xmm1, %xmm0 # sched: [22:1.00] +; GENERIC-NEXT: divsd (%rdi), %xmm0 # sched: [28:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_divsd: ; ATOM: # BB#0: @@ -1479,8 +1479,8 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { define void @test_lfence() { ; GENERIC-LABEL: test_lfence: ; GENERIC: # BB#0: -; GENERIC-NEXT: lfence -; GENERIC-NEXT: retq +; GENERIC-NEXT: lfence # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lfence: ; ATOM: # BB#0: @@ -1525,8 +1525,8 @@ declare void @llvm.x86.sse2.lfence() nounwind readnone define void @test_mfence() { ; GENERIC-LABEL: test_mfence: ; GENERIC: # BB#0: -; GENERIC-NEXT: mfence -; GENERIC-NEXT: retq +; GENERIC-NEXT: mfence # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mfence: ; ATOM: # BB#0: @@ -1571,8 +1571,8 @@ declare void @llvm.x86.sse2.mfence() nounwind readnone define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; GENERIC-LABEL: test_maskmovdqu: ; GENERIC: # BB#0: -; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: maskmovdqu %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_maskmovdqu: ; ATOM: # BB#0: @@ -1615,9 +1615,9 @@ declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_maxpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: maxpd %xmm1, %xmm0 -; GENERIC-NEXT: maxpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: maxpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: maxpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_maxpd: ; ATOM: # BB#0: @@ -1664,9 +1664,9 @@ declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_maxsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: maxsd %xmm1, %xmm0 -; GENERIC-NEXT: maxsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: maxsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: maxsd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_maxsd: ; ATOM: # BB#0: @@ -1713,9 +1713,9 @@ declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_minpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: minpd %xmm1, %xmm0 -; GENERIC-NEXT: minpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: minpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: minpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_minpd: ; ATOM: # BB#0: @@ -1762,9 +1762,9 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_minsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: minsd %xmm1, %xmm0 -; GENERIC-NEXT: minsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: minsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: minsd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_minsd: ; ATOM: # BB#0: @@ -1811,10 +1811,10 @@ declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_movapd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movapd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm0, %xmm0 -; GENERIC-NEXT: movapd %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movapd (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movapd %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movapd: ; ATOM: # BB#0: @@ -1866,10 +1866,10 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; GENERIC-LABEL: test_movdqa: ; GENERIC: # BB#0: -; GENERIC-NEXT: movdqa (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm0, %xmm0 -; GENERIC-NEXT: movdqa %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movdqa (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: movdqa %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movdqa: ; ATOM: # BB#0: @@ -1921,10 +1921,10 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; GENERIC-LABEL: test_movdqu: ; GENERIC: # BB#0: -; GENERIC-NEXT: movdqu (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm0, %xmm0 -; GENERIC-NEXT: movdqu %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movdqu (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: movdqu %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movdqu: ; ATOM: # BB#0: @@ -1976,13 +1976,13 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_movd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movd %edi, %xmm1 -; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero -; GENERIC-NEXT: paddd %xmm0, %xmm1 -; GENERIC-NEXT: paddd %xmm0, %xmm2 -; GENERIC-NEXT: movd %xmm2, %eax -; GENERIC-NEXT: movd %xmm1, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movd %edi, %xmm1 # sched: [1:1.00] +; GENERIC-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; GENERIC-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: movd %xmm2, %eax # sched: [2:1.00] +; GENERIC-NEXT: movd %xmm1, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movd: ; ATOM: # BB#0: @@ -2057,13 +2057,13 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: test_movd_64: ; GENERIC: # BB#0: -; GENERIC-NEXT: movq %rdi, %xmm1 -; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero -; GENERIC-NEXT: paddq %xmm0, %xmm1 -; GENERIC-NEXT: paddq %xmm0, %xmm2 -; GENERIC-NEXT: movq %xmm2, %rax -; GENERIC-NEXT: movq %xmm1, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movq %rdi, %xmm1 # sched: [1:1.00] +; GENERIC-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] +; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: movq %xmm2, %rax # sched: [2:1.00] +; GENERIC-NEXT: movq %xmm1, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movd_64: ; ATOM: # BB#0: @@ -2138,10 +2138,10 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movhpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; GENERIC-NEXT: addpd %xmm0, %xmm1 -; GENERIC-NEXT: movhpd %xmm1, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: movhpd %xmm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movhpd: ; ATOM: # BB#0: @@ -2196,10 +2196,10 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; GENERIC-LABEL: test_movlpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] -; GENERIC-NEXT: addpd %xmm0, %xmm1 -; GENERIC-NEXT: movlpd %xmm1, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movlpd: ; ATOM: # BB#0: @@ -2254,8 +2254,8 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { define i32 @test_movmskpd(<2 x double> %a0) { ; GENERIC-LABEL: test_movmskpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movmskpd %xmm0, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: movmskpd %xmm0, %eax # sched: [2:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movmskpd: ; ATOM: # BB#0: @@ -2296,9 +2296,9 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; GENERIC-LABEL: test_movntdqa: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddq %xmm0, %xmm0 -; GENERIC-NEXT: movntdq %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddq %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: movntdq %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movntdqa: ; ATOM: # BB#0: @@ -2345,9 +2345,9 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_movntpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: addpd %xmm0, %xmm0 -; GENERIC-NEXT: movntpd %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movntpd %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movntpd: ; ATOM: # BB#0: @@ -2392,10 +2392,10 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; GENERIC-LABEL: test_movq_mem: ; GENERIC: # BB#0: -; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: movq %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: movq %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movq_mem: ; ATOM: # BB#0: @@ -2449,9 +2449,9 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; GENERIC-LABEL: test_movq_reg: ; GENERIC: # BB#0: -; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:1.00] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movq_reg: ; ATOM: # BB#0: @@ -2498,10 +2498,10 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { define void @test_movsd_mem(double* %a0, double* %a1) { ; GENERIC-LABEL: test_movsd_mem: ; GENERIC: # BB#0: -; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; GENERIC-NEXT: addsd %xmm0, %xmm0 -; GENERIC-NEXT: movsd %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] +; GENERIC-NEXT: addsd %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movsd %xmm0, (%rsi) # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movsd_mem: ; ATOM: # BB#0: @@ -2553,9 +2553,9 @@ define void @test_movsd_mem(double* %a0, double* %a1) { define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; GENERIC-LABEL: test_movsd_reg: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; GENERIC-NEXT: movapd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0] sched: [1:1.00] +; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movsd_reg: ; ATOM: # BB#0: @@ -2599,10 +2599,10 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_movupd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movupd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm0, %xmm0 -; GENERIC-NEXT: movupd %xmm0, (%rsi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movupd (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: addpd %xmm0, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: movupd %xmm0, (%rsi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movupd: ; ATOM: # BB#0: @@ -2654,9 +2654,9 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_mulpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: mulpd %xmm1, %xmm0 -; GENERIC-NEXT: mulpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: mulpd %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: mulpd (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mulpd: ; ATOM: # BB#0: @@ -2702,9 +2702,9 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> define double @test_mulsd(double %a0, double %a1, double *%a2) { ; GENERIC-LABEL: test_mulsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: mulsd %xmm1, %xmm0 -; GENERIC-NEXT: mulsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: mulsd %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: mulsd (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_mulsd: ; ATOM: # BB#0: @@ -2750,10 +2750,10 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_orpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: orpd %xmm1, %xmm0 -; GENERIC-NEXT: orpd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: orpd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: orpd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_orpd: ; ATOM: # BB#0: @@ -2810,9 +2810,9 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_packssdw: ; GENERIC: # BB#0: -; GENERIC-NEXT: packssdw %xmm1, %xmm0 -; GENERIC-NEXT: packssdw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: packssdw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: packssdw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_packssdw: ; ATOM: # BB#0: @@ -2868,9 +2868,9 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_packsswb: ; GENERIC: # BB#0: -; GENERIC-NEXT: packsswb %xmm1, %xmm0 -; GENERIC-NEXT: packsswb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: packsswb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: packsswb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_packsswb: ; ATOM: # BB#0: @@ -2926,9 +2926,9 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_packuswb: ; GENERIC: # BB#0: -; GENERIC-NEXT: packuswb %xmm1, %xmm0 -; GENERIC-NEXT: packuswb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: packuswb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: packuswb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_packuswb: ; ATOM: # BB#0: @@ -2984,9 +2984,9 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_paddb: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddb %xmm1, %xmm0 -; GENERIC-NEXT: paddb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddb: ; ATOM: # BB#0: @@ -3036,9 +3036,9 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_paddd: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: paddd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddd: ; ATOM: # BB#0: @@ -3088,9 +3088,9 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_paddq: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: paddq (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddq (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddq: ; ATOM: # BB#0: @@ -3136,9 +3136,9 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_paddsb: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddsb %xmm1, %xmm0 -; GENERIC-NEXT: paddsb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddsb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddsb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddsb: ; ATOM: # BB#0: @@ -3189,9 +3189,9 @@ declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_paddsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddsw %xmm1, %xmm0 -; GENERIC-NEXT: paddsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddsw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddsw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddsw: ; ATOM: # BB#0: @@ -3242,9 +3242,9 @@ declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_paddusb: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddusb %xmm1, %xmm0 -; GENERIC-NEXT: paddusb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddusb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddusb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddusb: ; ATOM: # BB#0: @@ -3295,9 +3295,9 @@ declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnon define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_paddusw: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddusw %xmm1, %xmm0 -; GENERIC-NEXT: paddusw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddusw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddusw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddusw: ; ATOM: # BB#0: @@ -3348,9 +3348,9 @@ declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnon define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_paddw: ; GENERIC: # BB#0: -; GENERIC-NEXT: paddw %xmm1, %xmm0 -; GENERIC-NEXT: paddw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: paddw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddw: ; ATOM: # BB#0: @@ -3400,10 +3400,10 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_pand: ; GENERIC: # BB#0: -; GENERIC-NEXT: pand %xmm1, %xmm0 -; GENERIC-NEXT: pand (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pand %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: pand (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pand: ; ATOM: # BB#0: @@ -3456,12 +3456,12 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_pandn: ; GENERIC: # BB#0: -; GENERIC-NEXT: pandn %xmm1, %xmm0 -; GENERIC-NEXT: movdqa %xmm0, %xmm1 -; GENERIC-NEXT: pandn (%rdi), %xmm1 -; GENERIC-NEXT: paddq %xmm0, %xmm1 -; GENERIC-NEXT: movdqa %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pandn %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: movdqa %xmm0, %xmm1 # sched: [1:0.33] +; GENERIC-NEXT: pandn (%rdi), %xmm1 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pandn: ; ATOM: # BB#0: @@ -3520,9 +3520,9 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pavgb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pavgb %xmm1, %xmm0 -; GENERIC-NEXT: pavgb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pavgb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pavgb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pavgb: ; ATOM: # BB#0: @@ -3573,9 +3573,9 @@ declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %arg0, <16 x i8> %arg1) nounwi define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pavgw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pavgw %xmm1, %xmm0 -; GENERIC-NEXT: pavgw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pavgw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pavgw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pavgw: ; ATOM: # BB#0: @@ -3626,10 +3626,10 @@ declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpeqb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1 -; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pcmpeqb %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pcmpeqb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpeqb: ; ATOM: # BB#0: @@ -3685,10 +3685,10 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pcmpeqd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1 -; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pcmpeqd %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpeqd: ; ATOM: # BB#0: @@ -3744,10 +3744,10 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pcmpeqw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1 -; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pcmpeqw %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pcmpeqw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpeqw: ; ATOM: # BB#0: @@ -3803,11 +3803,11 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpgtb: ; GENERIC: # BB#0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 -; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2 -; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: pcmpgtb %xmm1, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pcmpgtb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpgtb: ; ATOM: # BB#0: @@ -3863,11 +3863,11 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pcmpgtd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 -; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2 -; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: pcmpgtd %xmm1, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pcmpeqd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpgtd: ; ATOM: # BB#0: @@ -3923,11 +3923,11 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pcmpgtw: ; GENERIC: # BB#0: -; GENERIC-NEXT: movdqa %xmm0, %xmm2 -; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2 -; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movdqa %xmm0, %xmm2 # sched: [1:0.33] +; GENERIC-NEXT: pcmpgtw %xmm1, %xmm2 # sched: [1:0.50] +; GENERIC-NEXT: pcmpgtw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm2, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpgtw: ; ATOM: # BB#0: @@ -3983,9 +3983,9 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define i16 @test_pextrw(<8 x i16> %a0) { ; GENERIC-LABEL: test_pextrw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pextrw $6, %xmm0, %eax +; GENERIC-NEXT: pextrw $6, %xmm0, %eax # sched: [3:1.00] ; GENERIC-NEXT: # kill: %AX %AX %EAX -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pextrw: ; ATOM: # BB#0: @@ -4029,9 +4029,9 @@ define i16 @test_pextrw(<8 x i16> %a0) { define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: test_pinsrw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pinsrw $1, %edi, %xmm0 -; GENERIC-NEXT: pinsrw $3, (%rsi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pinsrw $1, %edi, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: pinsrw $3, (%rsi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pinsrw: ; ATOM: # BB#0: @@ -4081,9 +4081,9 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmaddwd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaddwd %xmm1, %xmm0 -; GENERIC-NEXT: pmaddwd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaddwd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmaddwd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaddwd: ; ATOM: # BB#0: @@ -4139,9 +4139,9 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnon define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmaxsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaxsw %xmm1, %xmm0 -; GENERIC-NEXT: pmaxsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaxsw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pmaxsw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaxsw: ; ATOM: # BB#0: @@ -4192,9 +4192,9 @@ declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pmaxub: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaxub %xmm1, %xmm0 -; GENERIC-NEXT: pmaxub (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaxub %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pmaxub (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaxub: ; ATOM: # BB#0: @@ -4245,9 +4245,9 @@ declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pminsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pminsw %xmm1, %xmm0 -; GENERIC-NEXT: pminsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pminsw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pminsw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pminsw: ; ATOM: # BB#0: @@ -4298,9 +4298,9 @@ declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pminub: ; GENERIC: # BB#0: -; GENERIC-NEXT: pminub %xmm1, %xmm0 -; GENERIC-NEXT: pminub (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pminub %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pminub (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pminub: ; ATOM: # BB#0: @@ -4351,8 +4351,8 @@ declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone define i32 @test_pmovmskb(<16 x i8> %a0) { ; GENERIC-LABEL: test_pmovmskb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovmskb %xmm0, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovmskb %xmm0, %eax # sched: [2:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmovmskb: ; ATOM: # BB#0: @@ -4393,9 +4393,9 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmulhuw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmulhuw %xmm1, %xmm0 -; GENERIC-NEXT: pmulhuw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmulhuw %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmulhuw (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmulhuw: ; ATOM: # BB#0: @@ -4442,9 +4442,9 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnon define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmulhw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmulhw %xmm1, %xmm0 -; GENERIC-NEXT: pmulhw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmulhw %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmulhw (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmulhw: ; ATOM: # BB#0: @@ -4491,9 +4491,9 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmullw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmullw %xmm1, %xmm0 -; GENERIC-NEXT: pmullw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmullw %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmullw (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmullw: ; ATOM: # BB#0: @@ -4539,9 +4539,9 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pmuludq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmuludq %xmm1, %xmm0 -; GENERIC-NEXT: pmuludq (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmuludq %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmuludq (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmuludq: ; ATOM: # BB#0: @@ -4597,10 +4597,10 @@ declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnon define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_por: ; GENERIC: # BB#0: -; GENERIC-NEXT: por %xmm1, %xmm0 -; GENERIC-NEXT: por (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: por (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_por: ; ATOM: # BB#0: @@ -4653,9 +4653,9 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_psadbw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psadbw %xmm1, %xmm0 -; GENERIC-NEXT: psadbw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psadbw %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: psadbw (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psadbw: ; ATOM: # BB#0: @@ -4711,10 +4711,10 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_pshufd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] -; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,0,3,2] sched: [1:0.50] +; GENERIC-NEXT: pshufd {{.*#+}} xmm0 = mem[3,2,1,0] sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pshufd: ; ATOM: # BB#0: @@ -4769,10 +4769,10 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pshufhw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] -; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] -; GENERIC-NEXT: paddw %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pshufhw {{.*#+}} xmm1 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] +; GENERIC-NEXT: pshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] +; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pshufhw: ; ATOM: # BB#0: @@ -4827,10 +4827,10 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pshuflw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] -; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] -; GENERIC-NEXT: paddw %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pshuflw {{.*#+}} xmm1 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] +; GENERIC-NEXT: pshuflw {{.*#+}} xmm0 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] +; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pshuflw: ; ATOM: # BB#0: @@ -4885,10 +4885,10 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pslld: ; GENERIC: # BB#0: -; GENERIC-NEXT: pslld %xmm1, %xmm0 -; GENERIC-NEXT: pslld (%rdi), %xmm0 -; GENERIC-NEXT: pslld $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pslld %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: pslld (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: pslld $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pslld: ; ATOM: # BB#0: @@ -4943,8 +4943,8 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; GENERIC-LABEL: test_pslldq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] -; GENERIC-NEXT: retq +; GENERIC-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pslldq: ; ATOM: # BB#0: @@ -4988,10 +4988,10 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psllq: ; GENERIC: # BB#0: -; GENERIC-NEXT: psllq %xmm1, %xmm0 -; GENERIC-NEXT: psllq (%rdi), %xmm0 -; GENERIC-NEXT: psllq $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psllq %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psllq (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psllq $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psllq: ; ATOM: # BB#0: @@ -5046,10 +5046,10 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psllw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psllw %xmm1, %xmm0 -; GENERIC-NEXT: psllw (%rdi), %xmm0 -; GENERIC-NEXT: psllw $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psllw %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psllw (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psllw $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psllw: ; ATOM: # BB#0: @@ -5104,10 +5104,10 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psrad: ; GENERIC: # BB#0: -; GENERIC-NEXT: psrad %xmm1, %xmm0 -; GENERIC-NEXT: psrad (%rdi), %xmm0 -; GENERIC-NEXT: psrad $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psrad %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psrad (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psrad $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrad: ; ATOM: # BB#0: @@ -5162,10 +5162,10 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psraw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psraw %xmm1, %xmm0 -; GENERIC-NEXT: psraw (%rdi), %xmm0 -; GENERIC-NEXT: psraw $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psraw %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psraw (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psraw $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psraw: ; ATOM: # BB#0: @@ -5220,10 +5220,10 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psrld: ; GENERIC: # BB#0: -; GENERIC-NEXT: psrld %xmm1, %xmm0 -; GENERIC-NEXT: psrld (%rdi), %xmm0 -; GENERIC-NEXT: psrld $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psrld %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psrld (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psrld $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrld: ; ATOM: # BB#0: @@ -5278,8 +5278,8 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; GENERIC-LABEL: test_psrldq: ; GENERIC: # BB#0: -; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero -; GENERIC-NEXT: retq +; GENERIC-NEXT: psrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrldq: ; ATOM: # BB#0: @@ -5323,10 +5323,10 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psrlq: ; GENERIC: # BB#0: -; GENERIC-NEXT: psrlq %xmm1, %xmm0 -; GENERIC-NEXT: psrlq (%rdi), %xmm0 -; GENERIC-NEXT: psrlq $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psrlq %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psrlq (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psrlq $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrlq: ; ATOM: # BB#0: @@ -5381,10 +5381,10 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psrlw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psrlw %xmm1, %xmm0 -; GENERIC-NEXT: psrlw (%rdi), %xmm0 -; GENERIC-NEXT: psrlw $2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psrlw %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: psrlw (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: psrlw $2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrlw: ; ATOM: # BB#0: @@ -5439,9 +5439,9 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_psubb: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubb %xmm1, %xmm0 -; GENERIC-NEXT: psubb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubb: ; ATOM: # BB#0: @@ -5491,9 +5491,9 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psubd: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubd %xmm1, %xmm0 -; GENERIC-NEXT: psubd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubd: ; ATOM: # BB#0: @@ -5543,9 +5543,9 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_psubq: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubq %xmm1, %xmm0 -; GENERIC-NEXT: psubq (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubq (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubq: ; ATOM: # BB#0: @@ -5591,9 +5591,9 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_psubsb: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubsb %xmm1, %xmm0 -; GENERIC-NEXT: psubsb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubsb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubsb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubsb: ; ATOM: # BB#0: @@ -5644,9 +5644,9 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psubsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubsw %xmm1, %xmm0 -; GENERIC-NEXT: psubsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubsw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubsw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubsw: ; ATOM: # BB#0: @@ -5697,9 +5697,9 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_psubusb: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubusb %xmm1, %xmm0 -; GENERIC-NEXT: psubusb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubusb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubusb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubusb: ; ATOM: # BB#0: @@ -5750,9 +5750,9 @@ declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnon define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psubusw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubusw %xmm1, %xmm0 -; GENERIC-NEXT: psubusw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubusw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubusw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubusw: ; ATOM: # BB#0: @@ -5803,9 +5803,9 @@ declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnon define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psubw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psubw %xmm1, %xmm0 -; GENERIC-NEXT: psubw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psubw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psubw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubw: ; ATOM: # BB#0: @@ -5855,9 +5855,9 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_punpckhbw: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] +; GENERIC-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhbw: ; ATOM: # BB#0: @@ -5907,10 +5907,10 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_punpckhdq: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; GENERIC-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhdq: ; ATOM: # BB#0: @@ -5965,10 +5965,10 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_punpckhqdq: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] +; GENERIC-NEXT: punpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhqdq: ; ATOM: # BB#0: @@ -6021,9 +6021,9 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_punpckhwd: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; GENERIC-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhwd: ; ATOM: # BB#0: @@ -6073,9 +6073,9 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_punpcklbw: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] +; GENERIC-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpcklbw: ; ATOM: # BB#0: @@ -6125,10 +6125,10 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_punpckldq: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] +; GENERIC-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckldq: ; ATOM: # BB#0: @@ -6183,10 +6183,10 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_punpcklqdq: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] +; GENERIC-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpcklqdq: ; ATOM: # BB#0: @@ -6239,9 +6239,9 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_punpcklwd: ; GENERIC: # BB#0: -; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] -; GENERIC-NEXT: retq +; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] +; GENERIC-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpcklwd: ; ATOM: # BB#0: @@ -6291,10 +6291,10 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_pxor: ; GENERIC: # BB#0: -; GENERIC-NEXT: pxor %xmm1, %xmm0 -; GENERIC-NEXT: pxor (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pxor %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: pxor (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pxor: ; ATOM: # BB#0: @@ -6347,10 +6347,10 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_shufpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] -; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] +; GENERIC-NEXT: shufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_shufpd: ; ATOM: # BB#0: @@ -6403,10 +6403,10 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_sqrtpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 -; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: sqrtpd %xmm0, %xmm1 # sched: [22:1.00] +; GENERIC-NEXT: sqrtpd (%rdi), %xmm0 # sched: [28:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_sqrtpd: ; ATOM: # BB#0: @@ -6463,11 +6463,11 @@ declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_sqrtsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 -; GENERIC-NEXT: movapd (%rdi), %xmm1 -; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: sqrtsd %xmm0, %xmm0 # sched: [22:1.00] +; GENERIC-NEXT: movapd (%rdi), %xmm1 # sched: [6:0.50] +; GENERIC-NEXT: sqrtsd %xmm1, %xmm1 # sched: [22:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_sqrtsd: ; ATOM: # BB#0: @@ -6527,9 +6527,9 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_subpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: subpd %xmm1, %xmm0 -; GENERIC-NEXT: subpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: subpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: subpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_subpd: ; ATOM: # BB#0: @@ -6575,9 +6575,9 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> define double @test_subsd(double %a0, double %a1, double *%a2) { ; GENERIC-LABEL: test_subsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: subsd %xmm1, %xmm0 -; GENERIC-NEXT: subsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: subsd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: subsd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_subsd: ; ATOM: # BB#0: @@ -6623,17 +6623,17 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_ucomisd: ; GENERIC: # BB#0: -; GENERIC-NEXT: ucomisd %xmm1, %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %cl -; GENERIC-NEXT: andb %al, %cl -; GENERIC-NEXT: ucomisd (%rdi), %xmm0 -; GENERIC-NEXT: setnp %al -; GENERIC-NEXT: sete %dl -; GENERIC-NEXT: andb %al, %dl -; GENERIC-NEXT: orb %cl, %dl -; GENERIC-NEXT: movzbl %dl, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: ucomisd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %cl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] +; GENERIC-NEXT: ucomisd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: setnp %al # sched: [1:1.00] +; GENERIC-NEXT: sete %dl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %dl # sched: [1:0.33] +; GENERIC-NEXT: orb %cl, %dl # sched: [1:0.33] +; GENERIC-NEXT: movzbl %dl, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_ucomisd: ; ATOM: # BB#0: @@ -6729,10 +6729,10 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_unpckhpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] +; GENERIC-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_unpckhpd: ; ATOM: # BB#0: @@ -6785,12 +6785,12 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_unpcklpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; GENERIC-NEXT: movapd %xmm0, %xmm1 -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; GENERIC-NEXT: addpd %xmm0, %xmm1 -; GENERIC-NEXT: movapd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] +; GENERIC-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] +; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_unpcklpd: ; ATOM: # BB#0: @@ -6847,10 +6847,10 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_xorpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: xorpd %xmm1, %xmm0 -; GENERIC-NEXT: xorpd (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: xorpd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: xorpd (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_xorpd: ; ATOM: # BB#0: diff --git a/test/CodeGen/X86/sse3-schedule.ll b/test/CodeGen/X86/sse3-schedule.ll index cee02a74fd6..11f1e99ce01 100644 --- a/test/CodeGen/X86/sse3-schedule.ll +++ b/test/CodeGen/X86/sse3-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -12,9 +12,9 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_addsubpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: addsubpd %xmm1, %xmm0 -; GENERIC-NEXT: addsubpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: addsubpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: addsubpd (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_addsubpd: ; ATOM: # BB#0: @@ -61,9 +61,9 @@ declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwi define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_addsubps: ; GENERIC: # BB#0: -; GENERIC-NEXT: addsubps %xmm1, %xmm0 -; GENERIC-NEXT: addsubps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: addsubps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: addsubps (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_addsubps: ; ATOM: # BB#0: @@ -110,9 +110,9 @@ declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_haddpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: haddpd %xmm1, %xmm0 -; GENERIC-NEXT: haddpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: haddpd %xmm1, %xmm0 # sched: [5:2.00] +; GENERIC-NEXT: haddpd (%rdi), %xmm0 # sched: [11:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_haddpd: ; ATOM: # BB#0: @@ -159,9 +159,9 @@ declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_haddps: ; GENERIC: # BB#0: -; GENERIC-NEXT: haddps %xmm1, %xmm0 -; GENERIC-NEXT: haddps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: haddps %xmm1, %xmm0 # sched: [5:2.00] +; GENERIC-NEXT: haddps (%rdi), %xmm0 # sched: [11:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_haddps: ; ATOM: # BB#0: @@ -208,9 +208,9 @@ declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind re define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_hsubpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: hsubpd %xmm1, %xmm0 -; GENERIC-NEXT: hsubpd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: hsubpd %xmm1, %xmm0 # sched: [5:2.00] +; GENERIC-NEXT: hsubpd (%rdi), %xmm0 # sched: [11:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_hsubpd: ; ATOM: # BB#0: @@ -257,9 +257,9 @@ declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_hsubps: ; GENERIC: # BB#0: -; GENERIC-NEXT: hsubps %xmm1, %xmm0 -; GENERIC-NEXT: hsubps (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: hsubps %xmm1, %xmm0 # sched: [5:2.00] +; GENERIC-NEXT: hsubps (%rdi), %xmm0 # sched: [11:2.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_hsubps: ; ATOM: # BB#0: @@ -306,8 +306,8 @@ declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind re define <16 x i8> @test_lddqu(i8* %a0) { ; GENERIC-LABEL: test_lddqu: ; GENERIC: # BB#0: -; GENERIC-NEXT: lddqu (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: lddqu (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_lddqu: ; ATOM: # BB#0: @@ -348,10 +348,10 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_movddup: ; GENERIC: # BB#0: -; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] -; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movddup {{.*#+}} xmm1 = xmm0[0,0] sched: [1:1.00] +; GENERIC-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] sched: [6:0.50] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movddup: ; ATOM: # BB#0: @@ -405,10 +405,10 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_movshdup: ; GENERIC: # BB#0: -; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] sched: [1:1.00] +; GENERIC-NEXT: movshdup {{.*#+}} xmm0 = mem[1,1,3,3] sched: [6:0.50] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movshdup: ; ATOM: # BB#0: @@ -462,10 +462,10 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_movsldup: ; GENERIC: # BB#0: -; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] -; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movsldup {{.*#+}} xmm1 = xmm0[0,0,2,2] sched: [1:1.00] +; GENERIC-NEXT: movsldup {{.*#+}} xmm0 = mem[0,0,2,2] sched: [6:0.50] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movsldup: ; ATOM: # BB#0: diff --git a/test/CodeGen/X86/sse41-schedule.ll b/test/CodeGen/X86/sse41-schedule.ll index dff1dcbabe9..d5b35226817 100644 --- a/test/CodeGen/X86/sse41-schedule.ll +++ b/test/CodeGen/X86/sse41-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.1 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -11,10 +11,10 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_blendpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] -; GENERIC-NEXT: retq +; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_blendpd: ; SLM: # BB#0: @@ -60,9 +60,9 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_blendps: ; GENERIC: # BB#0: -; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] -; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] -; GENERIC-NEXT: retq +; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] +; GENERIC-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_blendps: ; SLM: # BB#0: @@ -102,12 +102,12 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, <2 x double> *%a3) { ; GENERIC-LABEL: test_blendvpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movapd %xmm0, %xmm3 -; GENERIC-NEXT: movaps %xmm2, %xmm0 -; GENERIC-NEXT: blendvpd %xmm0, %xmm1, %xmm3 -; GENERIC-NEXT: blendvpd %xmm0, (%rdi), %xmm3 -; GENERIC-NEXT: movapd %xmm3, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movapd %xmm0, %xmm3 # sched: [1:1.00] +; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: blendvpd %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; GENERIC-NEXT: blendvpd %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; GENERIC-NEXT: movapd %xmm3, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_blendvpd: ; SLM: # BB#0: @@ -151,12 +151,12 @@ declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x d define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, <4 x float> *%a3) { ; GENERIC-LABEL: test_blendvps: ; GENERIC: # BB#0: -; GENERIC-NEXT: movaps %xmm0, %xmm3 -; GENERIC-NEXT: movaps %xmm2, %xmm0 -; GENERIC-NEXT: blendvps %xmm0, %xmm1, %xmm3 -; GENERIC-NEXT: blendvps %xmm0, (%rdi), %xmm3 -; GENERIC-NEXT: movaps %xmm3, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movaps %xmm0, %xmm3 # sched: [1:1.00] +; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: blendvps %xmm0, %xmm1, %xmm3 # sched: [2:2.00] +; GENERIC-NEXT: blendvps %xmm0, (%rdi), %xmm3 # sched: [8:2.00] +; GENERIC-NEXT: movaps %xmm3, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_blendvps: ; SLM: # BB#0: @@ -200,9 +200,9 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x floa define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_dppd: ; GENERIC: # BB#0: -; GENERIC-NEXT: dppd $7, %xmm1, %xmm0 -; GENERIC-NEXT: dppd $7, (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: dppd $7, %xmm1, %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: dppd $7, (%rdi), %xmm0 # sched: [15:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_dppd: ; SLM: # BB#0: @@ -243,9 +243,9 @@ declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwi define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_dpps: ; GENERIC: # BB#0: -; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 -; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: dpps $7, %xmm1, %xmm0 # sched: [12:2.00] +; GENERIC-NEXT: dpps $7, (%rdi), %xmm0 # sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_dpps: ; SLM: # BB#0: @@ -286,9 +286,9 @@ declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) { ; GENERIC-LABEL: test_insertps: ; GENERIC: # BB#0: -; GENERIC-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] -; GENERIC-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] -; GENERIC-NEXT: retq +; GENERIC-NEXT: insertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] +; GENERIC-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_insertps: ; SLM: # BB#0: @@ -329,8 +329,8 @@ declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounw define <2 x i64> @test_movntdqa(i8* %a0) { ; GENERIC-LABEL: test_movntdqa: ; GENERIC: # BB#0: -; GENERIC-NEXT: movntdqa (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movntdqa (%rdi), %xmm0 # sched: [6:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_movntdqa: ; SLM: # BB#0: @@ -364,9 +364,9 @@ declare <2 x i64> @llvm.x86.sse41.movntdqa(i8*) nounwind readnone define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_mpsadbw: ; GENERIC: # BB#0: -; GENERIC-NEXT: mpsadbw $7, %xmm1, %xmm0 -; GENERIC-NEXT: mpsadbw $7, (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: mpsadbw $7, %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: mpsadbw $7, (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_mpsadbw: ; SLM: # BB#0: @@ -408,9 +408,9 @@ declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind rea define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_packusdw: ; GENERIC: # BB#0: -; GENERIC-NEXT: packusdw %xmm1, %xmm0 -; GENERIC-NEXT: packusdw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: packusdw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: packusdw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_packusdw: ; SLM: # BB#0: @@ -452,12 +452,12 @@ declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readno define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 x i8> *%a3) { ; GENERIC-LABEL: test_pblendvb: ; GENERIC: # BB#0: -; GENERIC-NEXT: movdqa %xmm0, %xmm3 -; GENERIC-NEXT: movaps %xmm2, %xmm0 -; GENERIC-NEXT: pblendvb %xmm0, %xmm1, %xmm3 -; GENERIC-NEXT: pblendvb %xmm0, (%rdi), %xmm3 -; GENERIC-NEXT: movdqa %xmm3, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movdqa %xmm0, %xmm3 # sched: [1:0.33] +; GENERIC-NEXT: movaps %xmm2, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: pblendvb %xmm0, %xmm1, %xmm3 # sched: [8:1.00] +; GENERIC-NEXT: pblendvb %xmm0, (%rdi), %xmm3 # sched: [6:1.00] +; GENERIC-NEXT: movdqa %xmm3, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pblendvb: ; SLM: # BB#0: @@ -501,9 +501,9 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pblendw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] -; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] -; GENERIC-NEXT: retq +; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] +; GENERIC-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pblendw: ; SLM: # BB#0: @@ -543,9 +543,9 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_pcmpeqq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpeqq %xmm1, %xmm0 -; GENERIC-NEXT: pcmpeqq (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pcmpeqq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pcmpeqq (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pcmpeqq: ; SLM: # BB#0: @@ -587,9 +587,9 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; GENERIC-LABEL: test_pextrb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pextrb $3, %xmm0, %eax -; GENERIC-NEXT: pextrb $1, %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: pextrb $3, %xmm0, %eax # sched: [3:1.00] +; GENERIC-NEXT: pextrb $1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrb: ; SLM: # BB#0: @@ -630,9 +630,9 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; GENERIC-LABEL: test_pextrd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pextrd $3, %xmm0, %eax -; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: pextrd $3, %xmm0, %eax # sched: [3:1.00] +; GENERIC-NEXT: pextrd $1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrd: ; SLM: # BB#0: @@ -672,9 +672,9 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; GENERIC-LABEL: test_pextrq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pextrq $1, %xmm0, %rax -; GENERIC-NEXT: pextrq $1, %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: pextrq $1, %xmm0, %rax # sched: [3:1.00] +; GENERIC-NEXT: pextrq $1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrq: ; SLM: # BB#0: @@ -714,9 +714,9 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; GENERIC-LABEL: test_pextrw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pextrw $3, %xmm0, %eax -; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: pextrw $3, %xmm0, %eax # sched: [3:1.00] +; GENERIC-NEXT: pextrw $1, %xmm0, (%rdi) # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pextrw: ; SLM: # BB#0: @@ -757,9 +757,9 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; GENERIC-LABEL: test_phminposuw: ; GENERIC: # BB#0: -; GENERIC-NEXT: phminposuw (%rdi), %xmm0 -; GENERIC-NEXT: phminposuw %xmm0, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phminposuw (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: phminposuw %xmm0, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_phminposuw: ; SLM: # BB#0: @@ -800,9 +800,9 @@ declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: test_pinsrb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pinsrb $1, %edi, %xmm0 -; GENERIC-NEXT: pinsrb $3, (%rsi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pinsrb $1, %edi, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: pinsrb $3, (%rsi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pinsrb: ; SLM: # BB#0: @@ -842,9 +842,9 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_pinsrd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pinsrd $1, %edi, %xmm0 -; GENERIC-NEXT: pinsrd $3, (%rsi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pinsrd $1, %edi, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: pinsrd $3, (%rsi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pinsrd: ; SLM: # BB#0: @@ -884,10 +884,10 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; GENERIC-LABEL: test_pinsrq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pinsrq $1, %rdi, %xmm0 -; GENERIC-NEXT: pinsrq $1, (%rsi), %xmm1 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pinsrq $1, %rdi, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: pinsrq $1, (%rsi), %xmm1 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pinsrq: ; SLM: # BB#0: @@ -933,9 +933,9 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pmaxsb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaxsb %xmm1, %xmm0 -; GENERIC-NEXT: pmaxsb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaxsb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pmaxsb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmaxsb: ; SLM: # BB#0: @@ -976,9 +976,9 @@ declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pmaxsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaxsd %xmm1, %xmm0 -; GENERIC-NEXT: pmaxsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaxsd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pmaxsd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmaxsd: ; SLM: # BB#0: @@ -1019,9 +1019,9 @@ declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pmaxud: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaxud %xmm1, %xmm0 -; GENERIC-NEXT: pmaxud (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaxud %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pmaxud (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmaxud: ; SLM: # BB#0: @@ -1062,9 +1062,9 @@ declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmaxuw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaxuw %xmm1, %xmm0 -; GENERIC-NEXT: pmaxuw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaxuw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pmaxuw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmaxuw: ; SLM: # BB#0: @@ -1105,9 +1105,9 @@ declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pminsb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pminsb %xmm1, %xmm0 -; GENERIC-NEXT: pminsb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pminsb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pminsb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pminsb: ; SLM: # BB#0: @@ -1148,9 +1148,9 @@ declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pminsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pminsd %xmm1, %xmm0 -; GENERIC-NEXT: pminsd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pminsd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pminsd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pminsd: ; SLM: # BB#0: @@ -1191,9 +1191,9 @@ declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pminud: ; GENERIC: # BB#0: -; GENERIC-NEXT: pminud %xmm1, %xmm0 -; GENERIC-NEXT: pminud (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pminud %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pminud (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pminud: ; SLM: # BB#0: @@ -1234,9 +1234,9 @@ declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pminuw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pminuw %xmm1, %xmm0 -; GENERIC-NEXT: pminuw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pminuw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pminuw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pminuw: ; SLM: # BB#0: @@ -1277,10 +1277,10 @@ declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; GENERIC-LABEL: test_pmovsxbw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovsxbw %xmm0, %xmm1 -; GENERIC-NEXT: pmovsxbw (%rdi), %xmm0 -; GENERIC-NEXT: paddw %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovsxbw %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pmovsxbw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovsxbw: ; SLM: # BB#0: @@ -1328,10 +1328,10 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; GENERIC-LABEL: test_pmovsxbd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovsxbd %xmm0, %xmm1 -; GENERIC-NEXT: pmovsxbd (%rdi), %xmm0 -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovsxbd %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pmovsxbd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovsxbd: ; SLM: # BB#0: @@ -1379,10 +1379,10 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; GENERIC-LABEL: test_pmovsxbq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovsxbq %xmm0, %xmm1 -; GENERIC-NEXT: pmovsxbq (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovsxbq %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pmovsxbq (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovsxbq: ; SLM: # BB#0: @@ -1430,10 +1430,10 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; GENERIC-LABEL: test_pmovsxdq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovsxdq %xmm0, %xmm1 -; GENERIC-NEXT: pmovsxdq (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovsxdq %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pmovsxdq (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovsxdq: ; SLM: # BB#0: @@ -1481,10 +1481,10 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; GENERIC-LABEL: test_pmovsxwd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovsxwd %xmm0, %xmm1 -; GENERIC-NEXT: pmovsxwd (%rdi), %xmm0 -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovsxwd %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pmovsxwd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovsxwd: ; SLM: # BB#0: @@ -1532,10 +1532,10 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; GENERIC-LABEL: test_pmovsxwq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovsxwq %xmm0, %xmm1 -; GENERIC-NEXT: pmovsxwq (%rdi), %xmm0 -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovsxwq %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pmovsxwq (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovsxwq: ; SLM: # BB#0: @@ -1583,10 +1583,10 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; GENERIC-LABEL: test_pmovzxbw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero -; GENERIC-NEXT: paddw %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] +; GENERIC-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] +; GENERIC-NEXT: paddw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovzxbw: ; SLM: # BB#0: @@ -1634,10 +1634,10 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; GENERIC-LABEL: test_pmovzxbd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero -; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] +; GENERIC-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovzxbd: ; SLM: # BB#0: @@ -1685,10 +1685,10 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; GENERIC-LABEL: test_pmovzxbq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero -; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] +; GENERIC-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovzxbq: ; SLM: # BB#0: @@ -1736,10 +1736,10 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; GENERIC-LABEL: test_pmovzxdq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero -; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] +; GENERIC-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovzxdq: ; SLM: # BB#0: @@ -1787,10 +1787,10 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; GENERIC-LABEL: test_pmovzxwd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero -; GENERIC-NEXT: paddd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] +; GENERIC-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] +; GENERIC-NEXT: paddd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovzxwd: ; SLM: # BB#0: @@ -1838,10 +1838,10 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; GENERIC-LABEL: test_pmovzxwq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero -; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero -; GENERIC-NEXT: paddq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] +; GENERIC-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] +; GENERIC-NEXT: paddq %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmovzxwq: ; SLM: # BB#0: @@ -1889,9 +1889,9 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pmuldq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmuldq %xmm1, %xmm0 -; GENERIC-NEXT: pmuldq (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmuldq %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmuldq (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmuldq: ; SLM: # BB#0: @@ -1933,9 +1933,9 @@ declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_pmulld: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmulld %xmm1, %xmm0 -; GENERIC-NEXT: pmulld (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmulld %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmulld (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pmulld: ; SLM: # BB#0: @@ -1975,13 +1975,13 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_ptest: ; GENERIC: # BB#0: -; GENERIC-NEXT: ptest %xmm1, %xmm0 -; GENERIC-NEXT: setb %al -; GENERIC-NEXT: ptest (%rdi), %xmm0 -; GENERIC-NEXT: setb %cl -; GENERIC-NEXT: andb %al, %cl -; GENERIC-NEXT: movzbl %cl, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: ptest %xmm1, %xmm0 # sched: [2:1.00] +; GENERIC-NEXT: setb %al # sched: [1:1.00] +; GENERIC-NEXT: ptest (%rdi), %xmm0 # sched: [8:1.00] +; GENERIC-NEXT: setb %cl # sched: [1:1.00] +; GENERIC-NEXT: andb %al, %cl # sched: [1:0.33] +; GENERIC-NEXT: movzbl %cl, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_ptest: ; SLM: # BB#0: @@ -2043,10 +2043,10 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; GENERIC-LABEL: test_roundpd: ; GENERIC: # BB#0: -; GENERIC-NEXT: roundpd $7, %xmm0, %xmm1 -; GENERIC-NEXT: roundpd $7, (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: roundpd $7, %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: roundpd $7, (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_roundpd: ; SLM: # BB#0: @@ -2094,10 +2094,10 @@ declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readno define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; GENERIC-LABEL: test_roundps: ; GENERIC: # BB#0: -; GENERIC-NEXT: roundps $7, %xmm0, %xmm1 -; GENERIC-NEXT: roundps $7, (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: roundps $7, %xmm0, %xmm1 # sched: [3:1.00] +; GENERIC-NEXT: roundps $7, (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_roundps: ; SLM: # BB#0: @@ -2145,11 +2145,11 @@ declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_roundsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movaps %xmm0, %xmm2 -; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 -; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 -; GENERIC-NEXT: addpd %xmm2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: roundsd $7, %xmm1, %xmm2 # sched: [3:1.00] +; GENERIC-NEXT: roundsd $7, (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_roundsd: ; SLM: # BB#0: @@ -2197,11 +2197,11 @@ declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) n define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; GENERIC-LABEL: test_roundss: ; GENERIC: # BB#0: -; GENERIC-NEXT: movaps %xmm0, %xmm2 -; GENERIC-NEXT: roundss $7, %xmm1, %xmm2 -; GENERIC-NEXT: roundss $7, (%rdi), %xmm0 -; GENERIC-NEXT: addps %xmm2, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movaps %xmm0, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: roundss $7, %xmm1, %xmm2 # sched: [3:1.00] +; GENERIC-NEXT: roundss $7, (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: addps %xmm2, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_roundss: ; SLM: # BB#0: diff --git a/test/CodeGen/X86/sse42-schedule.ll b/test/CodeGen/X86/sse42-schedule.ll index fe6a50b3a49..73d6c722565 100644 --- a/test/CodeGen/X86/sse42-schedule.ll +++ b/test/CodeGen/X86/sse42-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4.2 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -11,10 +11,10 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; GENERIC-LABEL: crc32_32_8: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32b %sil, %edi -; GENERIC-NEXT: crc32b (%rdx), %edi -; GENERIC-NEXT: movl %edi, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_8: ; SLM: # BB#0: @@ -60,10 +60,10 @@ declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; GENERIC-LABEL: crc32_32_16: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32w %si, %edi -; GENERIC-NEXT: crc32w (%rdx), %edi -; GENERIC-NEXT: movl %edi, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: crc32w %si, %edi # sched: [3:1.00] +; GENERIC-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_16: ; SLM: # BB#0: @@ -109,10 +109,10 @@ declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: crc32_32_32: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32l %esi, %edi -; GENERIC-NEXT: crc32l (%rdx), %edi -; GENERIC-NEXT: movl %edi, %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: crc32l %esi, %edi # sched: [3:1.00] +; GENERIC-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] +; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_32_32: ; SLM: # BB#0: @@ -158,10 +158,10 @@ declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; GENERIC-LABEL: crc32_64_8: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32b %sil, %edi -; GENERIC-NEXT: crc32b (%rdx), %edi -; GENERIC-NEXT: movq %rdi, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: crc32b %sil, %edi # sched: [3:1.00] +; GENERIC-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_8: ; SLM: # BB#0: @@ -207,10 +207,10 @@ declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; GENERIC-LABEL: crc32_64_64: ; GENERIC: # BB#0: -; GENERIC-NEXT: crc32q %rsi, %rdi -; GENERIC-NEXT: crc32q (%rdx), %rdi -; GENERIC-NEXT: movq %rdi, %rax -; GENERIC-NEXT: retq +; GENERIC-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] +; GENERIC-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] +; GENERIC-NEXT: movq %rdi, %rax # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: crc32_64_64: ; SLM: # BB#0: @@ -256,16 +256,16 @@ declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpestri: ; GENERIC: # BB#0: -; GENERIC-NEXT: movl $7, %eax -; GENERIC-NEXT: movl $7, %edx -; GENERIC-NEXT: pcmpestri $7, %xmm1, %xmm0 -; GENERIC-NEXT: movl %ecx, %esi -; GENERIC-NEXT: movl $7, %eax -; GENERIC-NEXT: movl $7, %edx -; GENERIC-NEXT: pcmpestri $7, (%rdi), %xmm0 +; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] +; GENERIC-NEXT: pcmpestri $7, %xmm1, %xmm0 # sched: [4:2.67] +; GENERIC-NEXT: movl %ecx, %esi # sched: [1:0.33] +; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] +; GENERIC-NEXT: pcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] ; GENERIC-NEXT: # kill: %ECX %ECX %RCX -; GENERIC-NEXT: leal (%rcx,%rsi), %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pcmpestri: ; SLM: # BB#0: @@ -342,13 +342,13 @@ declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nou define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpestrm: ; GENERIC: # BB#0: -; GENERIC-NEXT: movl $7, %eax -; GENERIC-NEXT: movl $7, %edx -; GENERIC-NEXT: pcmpestrm $7, %xmm1, %xmm0 -; GENERIC-NEXT: movl $7, %eax -; GENERIC-NEXT: movl $7, %edx -; GENERIC-NEXT: pcmpestrm $7, (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] +; GENERIC-NEXT: pcmpestrm $7, %xmm1, %xmm0 # sched: [11:2.67] +; GENERIC-NEXT: movl $7, %eax # sched: [1:0.33] +; GENERIC-NEXT: movl $7, %edx # sched: [1:0.33] +; GENERIC-NEXT: pcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pcmpestrm: ; SLM: # BB#0: @@ -409,12 +409,12 @@ declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpistri: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpistri $7, %xmm1, %xmm0 -; GENERIC-NEXT: movl %ecx, %eax -; GENERIC-NEXT: pcmpistri $7, (%rdi), %xmm0 +; GENERIC-NEXT: pcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; GENERIC-NEXT: movl %ecx, %eax # sched: [1:0.33] +; GENERIC-NEXT: pcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] ; GENERIC-NEXT: # kill: %ECX %ECX %RCX -; GENERIC-NEXT: leal (%rcx,%rax), %eax -; GENERIC-NEXT: retq +; GENERIC-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pcmpistri: ; SLM: # BB#0: @@ -471,9 +471,9 @@ declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind read define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pcmpistrm: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpistrm $7, %xmm1, %xmm0 -; GENERIC-NEXT: pcmpistrm $7, (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] +; GENERIC-NEXT: pcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pcmpistrm: ; SLM: # BB#0: @@ -514,9 +514,9 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; GENERIC-LABEL: test_pcmpgtq: ; GENERIC: # BB#0: -; GENERIC-NEXT: pcmpgtq %xmm1, %xmm0 -; GENERIC-NEXT: pcmpgtq (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pcmpgtq %xmm1, %xmm0 # sched: [5:1.00] +; GENERIC-NEXT: pcmpgtq (%rdi), %xmm0 # sched: [11:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SLM-LABEL: test_pcmpgtq: ; SLM: # BB#0: diff --git a/test/CodeGen/X86/sse4a-schedule.ll b/test/CodeGen/X86/sse4a-schedule.ll index 4e0dfcf223a..649383b4266 100644 --- a/test/CodeGen/X86/sse4a-schedule.ll +++ b/test/CodeGen/X86/sse4a-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+sse4a | FileCheck %s --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=BTVER2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=ZNVER1 @@ -7,7 +7,7 @@ define <2 x i64> @test_extrq(<2 x i64> %a0, <16 x i8> %a1) { ; GENERIC-LABEL: test_extrq: ; GENERIC: # BB#0: ; GENERIC-NEXT: extrq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_extrq: ; BTVER2: # BB#0: @@ -27,7 +27,7 @@ define <2 x i64> @test_extrqi(<2 x i64> %a0) { ; GENERIC-LABEL: test_extrqi: ; GENERIC: # BB#0: ; GENERIC-NEXT: extrq $2, $3, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_extrqi: ; BTVER2: # BB#0: @@ -47,7 +47,7 @@ define <2 x i64> @test_insertq(<2 x i64> %a0, <2 x i64> %a1) { ; GENERIC-LABEL: test_insertq: ; GENERIC: # BB#0: ; GENERIC-NEXT: insertq %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_insertq: ; BTVER2: # BB#0: @@ -67,7 +67,7 @@ define <2 x i64> @test_insertqi(<2 x i64> %a0, <2 x i64> %a1) { ; GENERIC-LABEL: test_insertqi: ; GENERIC: # BB#0: ; GENERIC-NEXT: insertq $6, $5, %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_insertqi: ; BTVER2: # BB#0: @@ -86,8 +86,8 @@ declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) define void @test_movntsd(i8* %p, <2 x double> %a) { ; GENERIC-LABEL: test_movntsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: movntsd %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movntsd %xmm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntsd: ; BTVER2: # BB#0: @@ -106,8 +106,8 @@ declare void @llvm.x86.sse4a.movnt.sd(i8*, <2 x double>) define void @test_movntss(i8* %p, <4 x float> %a) { ; GENERIC-LABEL: test_movntss: ; GENERIC: # BB#0: -; GENERIC-NEXT: movntss %xmm0, (%rdi) -; GENERIC-NEXT: retq +; GENERIC-NEXT: movntss %xmm0, (%rdi) # sched: [1:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntss: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/ssse3-schedule.ll b/test/CodeGen/X86/ssse3-schedule.ll index 2421e583944..e2e10bd964f 100644 --- a/test/CodeGen/X86/ssse3-schedule.ll +++ b/test/CodeGen/X86/ssse3-schedule.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+ssse3 | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=atom | FileCheck %s --check-prefix=CHECK --check-prefix=ATOM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=sandybridge | FileCheck %s --check-prefix=CHECK --check-prefix=SANDY @@ -12,10 +12,10 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; GENERIC-LABEL: test_pabsb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pabsb %xmm0, %xmm1 -; GENERIC-NEXT: pabsb (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pabsb %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pabsb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pabsb: ; ATOM: # BB#0: @@ -70,10 +70,10 @@ declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; GENERIC-LABEL: test_pabsd: ; GENERIC: # BB#0: -; GENERIC-NEXT: pabsd %xmm0, %xmm1 -; GENERIC-NEXT: pabsd (%rdi), %xmm0 -; GENERIC-NEXT: por %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pabsd %xmm0, %xmm1 # sched: [1:0.50] +; GENERIC-NEXT: pabsd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: por %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pabsd: ; ATOM: # BB#0: @@ -128,8 +128,8 @@ declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; GENERIC-LABEL: test_pabsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pabsw %xmm0, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pabsw %xmm0, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pabsw: ; ATOM: # BB#0: @@ -177,10 +177,10 @@ declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_palignr: ; GENERIC: # BB#0: -; GENERIC-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] -; GENERIC-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] -; GENERIC-NEXT: movdqa %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: palignr {{.*#+}} xmm1 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] +; GENERIC-NEXT: palignr {{.*#+}} xmm1 = mem[14,15],xmm1[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] +; GENERIC-NEXT: movdqa %xmm1, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_palignr: ; ATOM: # BB#0: @@ -230,9 +230,9 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_phaddd: ; GENERIC: # BB#0: -; GENERIC-NEXT: phaddd %xmm1, %xmm0 -; GENERIC-NEXT: phaddd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phaddd %xmm1, %xmm0 # sched: [3:1.50] +; GENERIC-NEXT: phaddd (%rdi), %xmm0 # sched: [9:1.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phaddd: ; ATOM: # BB#0: @@ -279,9 +279,9 @@ declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind rea define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_phaddsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: phaddsw %xmm1, %xmm0 -; GENERIC-NEXT: phaddsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phaddsw %xmm1, %xmm0 # sched: [3:1.50] +; GENERIC-NEXT: phaddsw (%rdi), %xmm0 # sched: [9:1.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phaddsw: ; ATOM: # BB#0: @@ -336,9 +336,9 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind re define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_phaddw: ; GENERIC: # BB#0: -; GENERIC-NEXT: phaddw %xmm1, %xmm0 -; GENERIC-NEXT: phaddw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phaddw %xmm1, %xmm0 # sched: [3:1.50] +; GENERIC-NEXT: phaddw (%rdi), %xmm0 # sched: [9:1.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phaddw: ; ATOM: # BB#0: @@ -385,9 +385,9 @@ declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind rea define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_phsubd: ; GENERIC: # BB#0: -; GENERIC-NEXT: phsubd %xmm1, %xmm0 -; GENERIC-NEXT: phsubd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phsubd %xmm1, %xmm0 # sched: [3:1.50] +; GENERIC-NEXT: phsubd (%rdi), %xmm0 # sched: [9:1.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phsubd: ; ATOM: # BB#0: @@ -434,9 +434,9 @@ declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind rea define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_phsubsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: phsubsw %xmm1, %xmm0 -; GENERIC-NEXT: phsubsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phsubsw %xmm1, %xmm0 # sched: [3:1.50] +; GENERIC-NEXT: phsubsw (%rdi), %xmm0 # sched: [9:1.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phsubsw: ; ATOM: # BB#0: @@ -491,9 +491,9 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind re define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_phsubw: ; GENERIC: # BB#0: -; GENERIC-NEXT: phsubw %xmm1, %xmm0 -; GENERIC-NEXT: phsubw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: phsubw %xmm1, %xmm0 # sched: [3:1.50] +; GENERIC-NEXT: phsubw (%rdi), %xmm0 # sched: [9:1.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phsubw: ; ATOM: # BB#0: @@ -540,9 +540,9 @@ declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind rea define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pmaddubsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmaddubsw %xmm1, %xmm0 -; GENERIC-NEXT: pmaddubsw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmaddubsw %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: pmaddubsw (%rdi), %xmm0 # sched: [9:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaddubsw: ; ATOM: # BB#0: @@ -590,8 +590,8 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_pmulhrsw: ; GENERIC: # BB#0: -; GENERIC-NEXT: pmulhrsw %xmm1, %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pmulhrsw %xmm1, %xmm0 # sched: [3:1.00] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmulhrsw: ; ATOM: # BB#0: @@ -632,9 +632,9 @@ declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_pshufb: ; GENERIC: # BB#0: -; GENERIC-NEXT: pshufb %xmm1, %xmm0 -; GENERIC-NEXT: pshufb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: pshufb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: pshufb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pshufb: ; ATOM: # BB#0: @@ -681,9 +681,9 @@ declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind rea define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; GENERIC-LABEL: test_psignb: ; GENERIC: # BB#0: -; GENERIC-NEXT: psignb %xmm1, %xmm0 -; GENERIC-NEXT: psignb (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psignb %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psignb (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psignb: ; ATOM: # BB#0: @@ -738,9 +738,9 @@ declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind rea define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; GENERIC-LABEL: test_psignd: ; GENERIC: # BB#0: -; GENERIC-NEXT: psignd %xmm1, %xmm0 -; GENERIC-NEXT: psignd (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psignd %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psignd (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psignd: ; ATOM: # BB#0: @@ -795,9 +795,9 @@ declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind rea define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; GENERIC-LABEL: test_psignw: ; GENERIC: # BB#0: -; GENERIC-NEXT: psignw %xmm1, %xmm0 -; GENERIC-NEXT: psignw (%rdi), %xmm0 -; GENERIC-NEXT: retq +; GENERIC-NEXT: psignw %xmm1, %xmm0 # sched: [1:0.50] +; GENERIC-NEXT: psignw (%rdi), %xmm0 # sched: [7:0.50] +; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psignw: ; ATOM: # BB#0: -- 2.40.0