From fd3c664b13736ea5d26501c440d3317d4a48b80f Mon Sep 17 00:00:00 2001 From: Michael Zuckerman Date: Wed, 28 Jun 2017 11:23:31 +0000 Subject: [PATCH] Reverting commit 306414 on behalf of @gadi.haber git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@306532 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86SchedHaswell.td | 4603 ++++++----------- lib/Target/X86/X86SchedSandyBridge.td | 2308 +-------- test/CodeGen/X86/avx-schedule.ll | 808 +-- test/CodeGen/X86/avx2-intrinsics-x86.ll | 6 +- test/CodeGen/X86/avx2-schedule.ll | 58 +- test/CodeGen/X86/avx512-cmp.ll | 4 +- test/CodeGen/X86/avx512-cvt.ll | 54 +- test/CodeGen/X86/avx512-insert-extract.ll | 166 +- test/CodeGen/X86/avx512-intrinsics-upgrade.ll | 4 +- test/CodeGen/X86/avx512-mask-op.ll | 646 +-- test/CodeGen/X86/avx512-vec-cmp.ll | 97 +- .../X86/avx512bw-intrinsics-upgrade.ll | 1480 +++--- .../X86/avx512bwvl-intrinsics-upgrade.ll | 128 +- test/CodeGen/X86/bitcast-and-setcc-256.ll | 2 +- ...ractelement-legalization-store-ordering.ll | 48 +- test/CodeGen/X86/fp128-i128.ll | 2 +- test/CodeGen/X86/gather-addresses.ll | 17 +- test/CodeGen/X86/half.ll | 1045 +--- .../CodeGen/X86/illegal-bitfield-loadstore.ll | 34 +- test/CodeGen/X86/mul-constant-i32.ll | 206 +- test/CodeGen/X86/mul-constant-i64.ll | 132 +- test/CodeGen/X86/pr32329.ll | 2 +- test/CodeGen/X86/recip-fastmath.ll | 224 +- test/CodeGen/X86/recip-fastmath2.ll | 442 +- test/CodeGen/X86/sse-schedule.ll | 466 +- test/CodeGen/X86/sse2-schedule.ll | 1108 ++-- test/CodeGen/X86/sse3-schedule.ll | 88 +- test/CodeGen/X86/sse41-schedule.ll | 432 +- test/CodeGen/X86/sse42-schedule.ll | 70 +- test/CodeGen/X86/ssse3-schedule.ll | 134 +- test/CodeGen/X86/vector-shift-ashr-512.ll | 12 +- test/CodeGen/X86/vector-shuffle-512-v32.ll | 12 +- 32 files changed, 5280 insertions(+), 9558 deletions(-) diff --git a/lib/Target/X86/X86SchedHaswell.td b/lib/Target/X86/X86SchedHaswell.td index 8b9f3b9b2e6..03c8ccb53af 100644 --- a/lib/Target/X86/X86SchedHaswell.td +++ b/lib/Target/X86/X86SchedHaswell.td @@ -23,8 +23,8 @@ def HaswellModel : SchedMachineModel { // Based on the LSD (loop-stream detector) queue size and benchmarking data. let LoopMicroOpBufferSize = 50; - // This flag is set to allow the scheduler to assign a default model to - // unrecognized opcodes. + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. let CompleteModel = 0; } @@ -267,3251 +267,1914 @@ def : WriteRes { let Latency = 100; } def : WriteRes; def : WriteRes; -//////////////////////////////////////////////////////////////////////////////// -// Horizontal add/sub instructions. -//////////////////////////////////////////////////////////////////////////////// +//================ Exceptions ================// -// HADD, HSUB PS/PD -// x,x / v,v,v. -def : WriteRes { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1, 2]; -} +//-- Specific Scheduling Models --// -// x,m / v,v,m. -def : WriteRes { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1, 2, 1]; -} +// Starting with P0. +def WriteP0 : SchedWriteRes<[HWPort0]>; -// PHADD|PHSUB (S) W/D. -// v <- v,v. -def : WriteRes { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1, 2]; +def WriteP0_P1_Lat4 : SchedWriteRes<[HWPort0, HWPort1]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -// v <- v,m. -def : WriteRes { - let Latency = 6; + +def WriteP0_P1_Lat4Ld : SchedWriteRes<[HWPort0, HWPort1, HWPort23]> { + let Latency = 8; let NumMicroOps = 3; - let ResourceCycles = [1, 2, 1]; + let ResourceCycles = [1, 1, 1]; } -// Remaining instrs. +def WriteP01 : SchedWriteRes<[HWPort01]>; -def HWWriteResGroup0 : SchedWriteRes<[HWPort23]> { - let Latency = 0; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[HWWriteResGroup0], (instregex "LDDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVD64to64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MMX_MOVQ64rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOV32rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOV64toPQIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOV8rm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVAPDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVAPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVDDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVDI2PDIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVDQArm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVNTDQArm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSHDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSLDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSX32rm16")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVSX32rm8")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVUPDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVUPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVZX32rm16")>; -def: InstRW<[HWWriteResGroup0], (instregex "MOVZX32rm8")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHNTA")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT0")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT1")>; -def: InstRW<[HWWriteResGroup0], (instregex "PREFETCHT2")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTF128")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTI128")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSDYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VBROADCASTSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VLDDQUYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VLDDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOV64toPQIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPDYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPSYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVAPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDDUPYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDI2PDIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQAYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQArm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQUYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVDQUrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVNTDQAYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVNTDQArm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVQI2PQIrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSHDUPYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSHDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSLDUPYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSLDUPrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVSSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPDYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPSYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VMOVUPSrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTDYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTDrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTQYrm")>; -def: InstRW<[HWWriteResGroup0], (instregex "VPBROADCASTQrm")>; - -def HWWriteResGroup1 : SchedWriteRes<[HWPort4,HWPort237]> { - let Latency = 0; +def Write2P01 : SchedWriteRes<[HWPort01]> { let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVD64mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVNTQmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MMX_MOVQ64mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOV64mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOV8mi")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOV8mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVAPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVAPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVDQAmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVDQUmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVHPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVHPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVLPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVLPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVNTDQmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVNTI_64mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVNTImr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVNTPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVNTPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVPDI2DImr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVPQI2QImr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVPQIto64mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVSSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVUPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "MOVUPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VEXTRACTF128mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VEXTRACTI128mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPDYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPSYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVAPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQAYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQAmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQUYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVDQUmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVHPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVHPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVLPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVLPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTDQYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTDQmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPDYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPSYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVNTPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVPDI2DImr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVPQI2QImr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVPQIto64mr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVSDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVSSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPDYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPDmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPSYmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMOVUPSmr")>; -def: InstRW<[HWWriteResGroup1], (instregex "VMPTRSTm")>; - -def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64grr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PMOVMSKBrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLDri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLDrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLQri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLQrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSLLWrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRADri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRADrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRAWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRAWrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLDri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLDrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLQri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLQrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "MMX_PSRLWrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MOVPDI2DIrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "MOVPQIto64rr")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSLLDri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSLLQri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSLLWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSRADri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSRAWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSRLDri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSRLQri")>; -def: InstRW<[HWWriteResGroup2], (instregex "PSRLWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VMOVPDI2DIrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VMOVPQIto64rr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLDri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLDrm")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLQri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLQrm")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQYrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSLLWrm")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRADYri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRADri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRAWYri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRAWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLDYri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLDri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLQYri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLQri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLVQYrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLVQrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLWYri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VPSRLWri")>; -def: InstRW<[HWWriteResGroup2], (instregex "VTESTPDYrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VTESTPDrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VTESTPSYrr")>; -def: InstRW<[HWWriteResGroup2], (instregex "VTESTPSrr")>; - -def HWWriteResGroup3 : SchedWriteRes<[HWPort1]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; +def Write3P01 : SchedWriteRes<[HWPort01]> { + let NumMicroOps = 3; } -def: InstRW<[HWWriteResGroup3], (instregex "MASKMOVDQU64")>; -def: InstRW<[HWWriteResGroup3], (instregex "MMX_MASKMOVQ64")>; -def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[HWWriteResGroup4], (instregex "ANDNPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "ANDNPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "ANDPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "ANDPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "INSERTPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "KORTESTBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64to64rr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PALIGNR64irr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PSHUFBrr64")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PSHUFWri")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKHBWirr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKHDQirr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKHWDirr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKLBWirr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKLDQirr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MMX_PUNPCKLWDirr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOV64toPQIrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVAPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVAPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVDDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVDI2PDIrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVHLPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVLHPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVSDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVSHDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVSLDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVSSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVUPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "MOVUPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "ORPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "ORPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PACKSSDWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PACKSSWBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PACKUSDWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PACKUSWBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PALIGNRrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PBLENDWrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXBDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXBQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVSXWQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXBDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXBQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PMOVZXWQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PSHUFBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PSHUFDri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PSHUFHWri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PSHUFLWri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PSLLDQri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PSRLDQri")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHQDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKHWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLQDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "PUNPCKLWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "SHUFPDrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "SHUFPSrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "UNPCKHPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "UNPCKHPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "UNPCKLPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "UNPCKLPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDNPDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDNPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDNPSYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDNPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VANDPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VBROADCASTSSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VGATHERQPSZrm")>; -def: InstRW<[HWWriteResGroup4], (instregex "VINSERTPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOV64toPQIrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOV64toPQIrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVAPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVDDUPYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVDDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVHLPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVLHPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSHDUPYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSHDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSLDUPYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSLDUPrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVSSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VMOVUPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VORPDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VORPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VORPSYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VORPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSDWYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSDWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSWBYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKSSWBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSDWYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSDWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSWBYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPACKUSWBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPALIGNRYrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPALIGNRrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPBLENDWYrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPBLENDWrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPBROADCASTDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPBROADCASTQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPDri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPDrm")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSrm")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPERMILPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXBDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXBQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVSXWQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXBDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXBQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPMOVZXWQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFBrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFDYri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFDri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFHWri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFLWYri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSHUFLWri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSLLDQYri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSLLDQri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSRLDQYri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPSRLDQri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHBWYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHDQYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHQDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHWDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKHWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLBWrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLDQYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLQDQYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLQDQrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLWDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VPUNPCKLWDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPDYrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPDrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPSYrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VSHUFPSrri")>; -def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKHPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKHPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPDYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPSYrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VUNPCKLPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VXORPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "VXORPSrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "XORPDrr")>; -def: InstRW<[HWWriteResGroup4], (instregex "XORPSrr")>; - -def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; +def WriteP015 : SchedWriteRes<[HWPort015]>; + +def WriteP01_P5 : SchedWriteRes<[HWPort01, HWPort5]> { + let NumMicroOps = 2; } -def: InstRW<[HWWriteResGroup5], (instregex "JMP64r")>; +def WriteP06 : SchedWriteRes<[HWPort06]>; -def HWWriteResGroup6 : SchedWriteRes<[HWPort01]> { +def Write2P06 : SchedWriteRes<[HWPort06]> { let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; + let NumMicroOps = 2; + let ResourceCycles = [2]; } -def: InstRW<[HWWriteResGroup6], (instregex "FINCSTP")>; -def: InstRW<[HWWriteResGroup6], (instregex "FNOP")>; -def HWWriteResGroup7 : SchedWriteRes<[HWPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; +def Write3P06_Lat2 : SchedWriteRes<[HWPort06]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [3]; } -def: InstRW<[HWWriteResGroup7], (instregex "BT32ri8")>; -def: InstRW<[HWWriteResGroup7], (instregex "BT32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "BTC32ri8")>; -def: InstRW<[HWWriteResGroup7], (instregex "BTC32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "BTR32ri8")>; -def: InstRW<[HWWriteResGroup7], (instregex "BTR32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "BTS32ri8")>; -def: InstRW<[HWWriteResGroup7], (instregex "BTS32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "CDQ")>; -def: InstRW<[HWWriteResGroup7], (instregex "CQO")>; -def: InstRW<[HWWriteResGroup7], (instregex "RORX32ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "RORX64ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SAR32ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SAR64r1")>; -def: InstRW<[HWWriteResGroup7], (instregex "SAR8r1")>; -def: InstRW<[HWWriteResGroup7], (instregex "SAR8ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SARX32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SARX64rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETAEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETBr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETGEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETGr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETLEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETLr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNEr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNOr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNPr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETNSr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETOr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETPr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SETSr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHL32ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHL64r1")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHL8r1")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHL8ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHLX32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHLX64rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHR32ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHR64r1")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHR8r1")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHR8ri")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHRX32rr")>; -def: InstRW<[HWWriteResGroup7], (instregex "SHRX64rr")>; - -def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; + +def WriteP0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { + let NumMicroOps = 2; } -def: InstRW<[HWWriteResGroup8], (instregex "ANDN32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "ANDN64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSI32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSI64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSMSK64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSR32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BLSR64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BZHI32rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "BZHI64rr")>; -def: InstRW<[HWWriteResGroup8], (instregex "LEA64_32r")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSBrr64")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSDrr64")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PABSWrr64")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDDirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDQirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDSBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDSWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDUSBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDUSWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PADDWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PAVGBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PAVGWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPEQBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPEQDirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPEQWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPGTBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPGTDirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PCMPGTWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMAXSWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMAXUBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMINSWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PMINUBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSIGNBrr64")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSIGNDrr64")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSIGNWrr64")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBDirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBQirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBSBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBSWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBUSBirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBUSWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "MMX_PSUBWirr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PABSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PABSDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PABSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDQrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDUSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDUSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PADDWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PAVGBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PAVGWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQQrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPEQWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPGTBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPGTDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PCMPGTWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMAXSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMAXSDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMAXSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMAXUBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMAXUDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMAXUWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMINSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMINSDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMINSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMINUBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMINUDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PMINUWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSIGNBrr128")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSIGNDrr128")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSIGNWrr128")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBQrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBUSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBUSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "PSUBWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VMASKMOVPSYrm")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPABSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPABSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPABSDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPABSDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPABSWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPABSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDQYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDQrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDUSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPADDWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPAVGBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPAVGBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPAVGWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPAVGWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQQrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPEQWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPCMPGTWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMAXUWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINSBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINSDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINSDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINSWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINUBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINUBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINUDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINUDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINUWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPMINUWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNBYrr256")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNBrr128")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNDYrr256")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNDrr128")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNWYrr256")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSIGNWrr128")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBDYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBDrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBQYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBQrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSBYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSBrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBUSWrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBWYrr")>; -def: InstRW<[HWWriteResGroup8], (instregex "VPSUBWrr")>; - -def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; + +def Write2P0156_P23 : SchedWriteRes<[HWPort0156, HWPort23]> { + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup9], (instregex "BLENDPDrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "BLENDPSrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_PANDNirr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_PANDirr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_PORirr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MMX_PXORirr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MOVDQArr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MOVDQUrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "MOVPQI2QIrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "PANDNrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "PANDrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "PORrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "PXORrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPDYrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPDrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPSYrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VBLENDPSrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQAYrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQArr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUYrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVDQUrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VMOVZPQILo2PQIrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPANDNYrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPANDNrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPANDYrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPANDrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDDYrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDDrri")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPORYrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPORrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPXORYrr")>; -def: InstRW<[HWWriteResGroup9], (instregex "VPXORrr")>; - -def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; + +def Write2P0156_Lat2 : SchedWriteRes<[HWPort0156]> { + let Latency = 2; + let ResourceCycles = [2]; } -def: InstRW<[HWWriteResGroup10], (instregex "ADD32ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "ADD32rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "ADD8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "ADD8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND32ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND64ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND64rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "AND8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "CBW")>; -def: InstRW<[HWWriteResGroup10], (instregex "CLC")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMC")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP16ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP32i32")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP64rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "CMP8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "CWDE")>; -def: InstRW<[HWWriteResGroup10], (instregex "DEC64r")>; -def: InstRW<[HWWriteResGroup10], (instregex "DEC8r")>; -def: InstRW<[HWWriteResGroup10], (instregex "INC64r")>; -def: InstRW<[HWWriteResGroup10], (instregex "INC8r")>; -def: InstRW<[HWWriteResGroup10], (instregex "LAHF")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOV32rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOV8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOV8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOVSX32rr16")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOVSX32rr8")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOVZX32rr16")>; -def: InstRW<[HWWriteResGroup10], (instregex "MOVZX32rr8")>; -def: InstRW<[HWWriteResGroup10], (instregex "NEG64r")>; -def: InstRW<[HWWriteResGroup10], (instregex "NEG8r")>; -def: InstRW<[HWWriteResGroup10], (instregex "NOOP")>; -def: InstRW<[HWWriteResGroup10], (instregex "NOT64r")>; -def: InstRW<[HWWriteResGroup10], (instregex "NOT8r")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR64ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR64rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "OR8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "SAHF")>; -def: InstRW<[HWWriteResGroup10], (instregex "SGDT64m")>; -def: InstRW<[HWWriteResGroup10], (instregex "SIDT64m")>; -def: InstRW<[HWWriteResGroup10], (instregex "SLDT16m")>; -def: InstRW<[HWWriteResGroup10], (instregex "SMSW16m")>; -def: InstRW<[HWWriteResGroup10], (instregex "STC")>; -def: InstRW<[HWWriteResGroup10], (instregex "STRm")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB64ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB64rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "SUB8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "SYSCALL")>; -def: InstRW<[HWWriteResGroup10], (instregex "TEST64rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "TEST8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "TEST8rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "XCHG64rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "XOR32rr")>; -def: InstRW<[HWWriteResGroup10], (instregex "XOR64ri8")>; -def: InstRW<[HWWriteResGroup10], (instregex "XOR8ri")>; -def: InstRW<[HWWriteResGroup10], (instregex "XOR8rr")>; - -def HWWriteResGroup11 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup11], (instregex "CVTPS2PDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "CVTSS2SDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLQrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSLLWrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRADrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRAWrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLQrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "MMX_PSRLWrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VCVTPH2PSrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VCVTPS2PDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VCVTSS2SDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLDYri")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLQYri")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLVQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLVQrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSLLWYri")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRADYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRAWYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLDYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLVQYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLVQrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VPSRLWYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPDYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPDrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPSYrm")>; -def: InstRW<[HWWriteResGroup11], (instregex "VTESTPSrm")>; - -def HWWriteResGroup12 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup12], (instregex "ANDNPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "ANDNPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "ANDPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "ANDPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "INSERTPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PALIGNR64irm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PINSRWirmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PSHUFBrm64")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PSHUFWmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKHBWirm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKHDQirm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKHWDirm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKLBWirm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKLDQirm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MMX_PUNPCKLWDirm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MOVHPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MOVHPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MOVLPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "MOVLPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "ORPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "ORPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PACKSSDWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PACKSSWBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PACKUSDWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PACKUSWBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PALIGNRrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "PBLENDWrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "PINSRBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PINSRDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PINSRQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PINSRWrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXBDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXBQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVSXWQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXBDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXBQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PMOVZXWQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PSHUFBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PSHUFDmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "PSHUFHWmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "PSHUFLWmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHQDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKHWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLQDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "PUNPCKLWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "SHUFPDrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "SHUFPSrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "UNPCKHPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "UNPCKHPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "UNPCKLPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "UNPCKLPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDNPDYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDNPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDNPSYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDNPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VANDPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VINSERTPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VMOVHPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VMOVHPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VMOVLPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VMOVLPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VORPDYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VORPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VORPSYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VORPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSDWYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSDWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSWBYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKSSWBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSDWYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSDWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSWBYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPACKUSWBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPALIGNRYrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPALIGNRrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPBLENDWYrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPBLENDWrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDYri")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPDri")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSYri")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPERMILPSri")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPINSRBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPINSRDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPINSRQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPINSRWrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXBDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXBQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVSXWQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXBDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXBQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPMOVZXWQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFBrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFDYmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFDmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFHWmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFHWmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFLWYmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPSHUFLWmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHBWYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHDQYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHWDYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKHWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLDQYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLQDQYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLQDQrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLWDYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VPUNPCKLWDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPDYrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPDrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPSYrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VSHUFPSrmi")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKHPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPDYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPSYrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VUNPCKLPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VXORPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VXORPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VXORPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "VXORPSrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "XORPDrm")>; -def: InstRW<[HWWriteResGroup12], (instregex "XORPSrm")>; - -def HWWriteResGroup13 : SchedWriteRes<[HWPort6,HWPort23]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; +def Write2P0156_Lat2Ld : SchedWriteRes<[HWPort0156, HWPort23]> { + let Latency = 6; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup13], (instregex "FARJMP64")>; -def: InstRW<[HWWriteResGroup13], (instregex "JMP64m")>; -def HWWriteResGroup14 : SchedWriteRes<[HWPort23,HWPort0]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup14], (instregex "BT64mi8")>; -def: InstRW<[HWWriteResGroup14], (instregex "RORX32mi")>; -def: InstRW<[HWWriteResGroup14], (instregex "RORX64mi")>; -def: InstRW<[HWWriteResGroup14], (instregex "SARX32rm")>; -def: InstRW<[HWWriteResGroup14], (instregex "SARX64rm")>; -def: InstRW<[HWWriteResGroup14], (instregex "SHLX32rm")>; -def: InstRW<[HWWriteResGroup14], (instregex "SHLX64rm")>; -def: InstRW<[HWWriteResGroup14], (instregex "SHRX32rm")>; -def: InstRW<[HWWriteResGroup14], (instregex "SHRX64rm")>; - -def HWWriteResGroup15 : SchedWriteRes<[HWPort23,HWPort15]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup15], (instregex "ANDN32rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "ANDN64rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BLSI32rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BLSI64rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BLSMSK32rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BLSMSK64rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BLSR32rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BLSR64rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BZHI32rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "BZHI64rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PABSBrm64")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PABSDrm64")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PABSWrm64")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDDirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDQirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDSBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDSWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDUSBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDUSWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PADDWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PAVGBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PAVGWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPEQBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPEQDirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPEQWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPGTBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPGTDirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PCMPGTWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMAXSWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMAXUBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMINSWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PMINUBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSIGNBrm64")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSIGNDrm64")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSIGNWrm64")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBDirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBQirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBSBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBSWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBUSBirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBUSWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MMX_PSUBWirm")>; -def: InstRW<[HWWriteResGroup15], (instregex "MOVBE64rm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PABSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PABSDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PABSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDUSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDUSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PADDWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PAVGBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PAVGWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPEQWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPGTBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPGTDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PCMPGTWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMAXSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMAXSDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMAXSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMAXUBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMAXUDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMAXUWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMINSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMINSDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMINSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMINUBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMINUDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PMINUWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSIGNBrm128")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSIGNDrm128")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSIGNWrm128")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBUSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBUSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "PSUBWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPABSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPABSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPABSDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPABSDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPABSWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPABSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDQYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDUSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPADDWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPAVGBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPAVGBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPAVGWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPAVGWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPEQWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPCMPGTWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMAXUWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINSBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINSDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINSDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINSWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINUBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINUBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINUDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINUDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINUWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPMINUWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNBYrm256")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNBrm128")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNDYrm256")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNDrm128")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNWYrm256")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSIGNWrm128")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBDYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBDrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBQYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBQrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSBYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSBrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBUSWrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBWYrm")>; -def: InstRW<[HWWriteResGroup15], (instregex "VPSUBWrm")>; - -def HWWriteResGroup16 : SchedWriteRes<[HWPort23,HWPort015]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup16], (instregex "BLENDPDrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "BLENDPSrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "MMX_PANDNirm")>; -def: InstRW<[HWWriteResGroup16], (instregex "MMX_PANDirm")>; -def: InstRW<[HWWriteResGroup16], (instregex "MMX_PORirm")>; -def: InstRW<[HWWriteResGroup16], (instregex "MMX_PXORirm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PANDNrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PANDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PORrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "PXORrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPDYrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPDrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPSYrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "VBLENDPSrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "VINSERTF128rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VINSERTI128rm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPANDNYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPANDNrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPANDYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPANDrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPBLENDDYrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPBLENDDrmi")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPORYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPORrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPXORYrm")>; -def: InstRW<[HWWriteResGroup16], (instregex "VPXORrm")>; - -def HWWriteResGroup17 : SchedWriteRes<[HWPort23,HWPort0156]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup17], (instregex "ADD64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "ADD8rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "AND64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "AND8rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "CMP64mi8")>; -def: InstRW<[HWWriteResGroup17], (instregex "CMP64mr")>; -def: InstRW<[HWWriteResGroup17], (instregex "CMP64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "CMP8mi")>; -def: InstRW<[HWWriteResGroup17], (instregex "CMP8mr")>; -def: InstRW<[HWWriteResGroup17], (instregex "CMP8rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "OR64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "OR8rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "POP64r")>; -def: InstRW<[HWWriteResGroup17], (instregex "SUB64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "SUB8rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "TEST64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "TEST8mi")>; -def: InstRW<[HWWriteResGroup17], (instregex "TEST8rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "XOR64rm")>; -def: InstRW<[HWWriteResGroup17], (instregex "XOR8rm")>; - -def HWWriteResGroup18 : SchedWriteRes<[HWPort237,HWPort0156]> { - let Latency = 1; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; +def Write5P0156 : SchedWriteRes<[HWPort0156]> { + let NumMicroOps = 5; + let ResourceCycles = [5]; } -def: InstRW<[HWWriteResGroup18], (instregex "SFENCE")>; -def HWWriteResGroup19 : SchedWriteRes<[HWPort4,HWPort5,HWPort237]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup19], (instregex "EXTRACTPSmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "PEXTRBmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "PEXTRDmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "PEXTRQmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "PEXTRWmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "STMXCSR")>; -def: InstRW<[HWWriteResGroup19], (instregex "VEXTRACTPSmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRBmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRDmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRQmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "VPEXTRWmr")>; -def: InstRW<[HWWriteResGroup19], (instregex "VSTMXCSR")>; - -def HWWriteResGroup20 : SchedWriteRes<[HWPort4,HWPort6,HWPort237]> { +def WriteP0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1, 2, 1]; } -def: InstRW<[HWWriteResGroup20], (instregex "FNSTCW16m")>; -def HWWriteResGroup21 : SchedWriteRes<[HWPort4,HWPort237,HWPort0]> { - let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup21], (instregex "SETAEm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETBm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETEm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETGEm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETGm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETLEm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETLm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETNEm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETNOm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETNPm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETNSm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETOm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETPm")>; -def: InstRW<[HWWriteResGroup21], (instregex "SETSm")>; - -def HWWriteResGroup22 : SchedWriteRes<[HWPort4,HWPort237,HWPort15]> { +def Write2P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [2, 2, 1]; } -def: InstRW<[HWWriteResGroup22], (instregex "MOVBE64mr")>; -def HWWriteResGroup23 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> { +def Write3P0156_2P237_P4 : SchedWriteRes<[HWPort0156, HWPort237, HWPort4]> { let Latency = 1; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [3, 2, 1]; } -def: InstRW<[HWWriteResGroup23], (instregex "PUSH64i8")>; -def: InstRW<[HWWriteResGroup23], (instregex "PUSH64r")>; -def: InstRW<[HWWriteResGroup23], (instregex "STOSB")>; -def: InstRW<[HWWriteResGroup23], (instregex "STOSL")>; -def: InstRW<[HWWriteResGroup23], (instregex "STOSQ")>; -def: InstRW<[HWWriteResGroup23], (instregex "STOSW")>; -def HWWriteResGroup24 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0]> { - let Latency = 1; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup24], (instregex "BTC64mi8")>; -def: InstRW<[HWWriteResGroup24], (instregex "BTR64mi8")>; -def: InstRW<[HWWriteResGroup24], (instregex "BTS64mi8")>; -def: InstRW<[HWWriteResGroup24], (instregex "SAR64m1")>; -def: InstRW<[HWWriteResGroup24], (instregex "SAR64mi")>; -def: InstRW<[HWWriteResGroup24], (instregex "SAR8m1")>; -def: InstRW<[HWWriteResGroup24], (instregex "SAR8mi")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHL64m1")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHL64mi")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHL8m1")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHL8mi")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHR64m1")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHR64mi")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHR8m1")>; -def: InstRW<[HWWriteResGroup24], (instregex "SHR8mi")>; - -def HWWriteResGroup25 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { - let Latency = 1; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[HWWriteResGroup25], (instregex "ADD64mi8")>; -def: InstRW<[HWWriteResGroup25], (instregex "ADD64mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "ADD8mi")>; -def: InstRW<[HWWriteResGroup25], (instregex "ADD8mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "AND64mi8")>; -def: InstRW<[HWWriteResGroup25], (instregex "AND64mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "AND8mi")>; -def: InstRW<[HWWriteResGroup25], (instregex "AND8mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "DEC64m")>; -def: InstRW<[HWWriteResGroup25], (instregex "DEC8m")>; -def: InstRW<[HWWriteResGroup25], (instregex "INC64m")>; -def: InstRW<[HWWriteResGroup25], (instregex "INC8m")>; -def: InstRW<[HWWriteResGroup25], (instregex "NEG64m")>; -def: InstRW<[HWWriteResGroup25], (instregex "NEG8m")>; -def: InstRW<[HWWriteResGroup25], (instregex "NOT64m")>; -def: InstRW<[HWWriteResGroup25], (instregex "NOT8m")>; -def: InstRW<[HWWriteResGroup25], (instregex "OR64mi8")>; -def: InstRW<[HWWriteResGroup25], (instregex "OR64mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "OR8mi")>; -def: InstRW<[HWWriteResGroup25], (instregex "OR8mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "POP64rmm")>; -def: InstRW<[HWWriteResGroup25], (instregex "PUSH64rmm")>; -def: InstRW<[HWWriteResGroup25], (instregex "SUB64mi8")>; -def: InstRW<[HWWriteResGroup25], (instregex "SUB64mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "SUB8mi")>; -def: InstRW<[HWWriteResGroup25], (instregex "SUB8mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "XOR64mi8")>; -def: InstRW<[HWWriteResGroup25], (instregex "XOR64mr")>; -def: InstRW<[HWWriteResGroup25], (instregex "XOR8mi")>; -def: InstRW<[HWWriteResGroup25], (instregex "XOR8mr")>; - -def HWWriteResGroup26 : SchedWriteRes<[HWPort5]> { - let Latency = 2; +// Starting with P1. +def WriteP1 : SchedWriteRes<[HWPort1]>; + +def WriteP1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { let NumMicroOps = 2; - let ResourceCycles = [2]; } -def: InstRW<[HWWriteResGroup26], (instregex "BLENDVPDrr0")>; -def: InstRW<[HWWriteResGroup26], (instregex "BLENDVPSrr0")>; -def: InstRW<[HWWriteResGroup26], (instregex "MMX_PINSRWirri")>; -def: InstRW<[HWWriteResGroup26], (instregex "PBLENDVBrr0")>; -def: InstRW<[HWWriteResGroup26], (instregex "PINSRBrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "PINSRDrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "PINSRQrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "PINSRWrri")>; -def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPDYrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPDrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPSYrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VBLENDVPSrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VPBLENDVBYrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VPBLENDVBrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VPINSRBrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VPINSRDrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VPINSRQrr")>; -def: InstRW<[HWWriteResGroup26], (instregex "VPINSRWrri")>; - -def HWWriteResGroup27 : SchedWriteRes<[HWPort01]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; +def WriteP1_Lat3 : SchedWriteRes<[HWPort1]> { + let Latency = 3; } -def: InstRW<[HWWriteResGroup27], (instregex "FDECSTP")>; - -def HWWriteResGroup28 : SchedWriteRes<[HWPort0]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; +def WriteP1_Lat3Ld : SchedWriteRes<[HWPort1, HWPort23]> { + let Latency = 7; } -def: InstRW<[HWWriteResGroup28], (instregex "ROL32ri")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROL64r1")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROL8r1")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROL8ri")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROR32ri")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROR64r1")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROR8r1")>; -def: InstRW<[HWWriteResGroup28], (instregex "ROR8ri")>; -def HWWriteResGroup29 : SchedWriteRes<[HWPort0156]> { - let Latency = 2; +def Write2P1 : SchedWriteRes<[HWPort1]> { let NumMicroOps = 2; let ResourceCycles = [2]; } -def: InstRW<[HWWriteResGroup29], (instregex "LFENCE")>; -def: InstRW<[HWWriteResGroup29], (instregex "MFENCE")>; -def: InstRW<[HWWriteResGroup29], (instregex "WAIT")>; -def: InstRW<[HWWriteResGroup29], (instregex "XGETBV")>; +def Write2P1_P23 : SchedWriteRes<[HWPort1, HWPort23]> { + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def WriteP15 : SchedWriteRes<[HWPort15]>; +def WriteP15Ld : SchedWriteRes<[HWPort15, HWPort23]> { + let Latency = 4; +} -def HWWriteResGroup30 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup30], (instregex "CVTPS2PDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "CVTSS2SDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "EXTRACTPSrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "MMX_PEXTRWirri")>; -def: InstRW<[HWWriteResGroup30], (instregex "PEXTRBrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PEXTRDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PEXTRQrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PEXTRWri")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSLLDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSLLQrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSLLWrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSRADrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSRAWrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSRLDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSRLQrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PSRLWrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "PTESTrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VCVTPH2PSYrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VCVTPH2PSrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VCVTPS2PDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VCVTSS2SDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VEXTRACTPSrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRBrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRQrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPEXTRWri")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPSRADrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPSRAWrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPSRLDrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPSRLQrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPSRLWrr")>; -def: InstRW<[HWWriteResGroup30], (instregex "VPTESTrr")>; - -def HWWriteResGroup31 : SchedWriteRes<[HWPort6,HWPort0156]> { - let Latency = 2; +def WriteP1_P5_Lat4 : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 4; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup31], (instregex "CLFLUSH")>; -def HWWriteResGroup32 : SchedWriteRes<[HWPort01,HWPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; +def WriteP1_P5_Lat4Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 8; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup32], (instregex "MMX_MOVDQ2Qrr")>; -def HWWriteResGroup33 : SchedWriteRes<[HWPort0,HWPort15]> { - let Latency = 2; +def WriteP1_P5_Lat6 : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 6; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup33], (instregex "BEXTR32rr")>; -def: InstRW<[HWWriteResGroup33], (instregex "BEXTR64rr")>; -def: InstRW<[HWWriteResGroup33], (instregex "BSWAP32r")>; -def HWWriteResGroup34 : SchedWriteRes<[HWPort0,HWPort0156]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup34], (instregex "ADC64ri8")>; -def: InstRW<[HWWriteResGroup34], (instregex "ADC64rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "ADC8ri")>; -def: InstRW<[HWWriteResGroup34], (instregex "ADC8rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVAE32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVB32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVE32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVG32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVGE32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVL32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVLE32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVNE32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVNO32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVNP32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVNS32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVO32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVP32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CMOVS32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "CWD")>; -def: InstRW<[HWWriteResGroup34], (instregex "SBB32rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "SBB64ri8")>; -def: InstRW<[HWWriteResGroup34], (instregex "SBB8ri")>; -def: InstRW<[HWWriteResGroup34], (instregex "SBB8rr")>; -def: InstRW<[HWWriteResGroup34], (instregex "SETAr")>; -def: InstRW<[HWWriteResGroup34], (instregex "SETBEr")>; - -def HWWriteResGroup35 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[HWWriteResGroup35], (instregex "BLENDVPDrm0")>; -def: InstRW<[HWWriteResGroup35], (instregex "BLENDVPSrm0")>; -def: InstRW<[HWWriteResGroup35], (instregex "MMX_PACKSSDWirm")>; -def: InstRW<[HWWriteResGroup35], (instregex "MMX_PACKSSWBirm")>; -def: InstRW<[HWWriteResGroup35], (instregex "MMX_PACKUSWBirm")>; -def: InstRW<[HWWriteResGroup35], (instregex "PBLENDVBrm0")>; -def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPDYrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPDrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPSYrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VBLENDVPSrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPDrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPDrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPSrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VMASKMOVPSrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VPBLENDVBYrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VPBLENDVBrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVDYrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVDrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVQYrm")>; -def: InstRW<[HWWriteResGroup35], (instregex "VPMASKMOVQrm")>; - -def HWWriteResGroup36 : SchedWriteRes<[HWPort23,HWPort0156]> { - let Latency = 2; +def WriteP1_P5_Lat6Ld : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 10; let NumMicroOps = 3; - let ResourceCycles = [1,2]; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup36], (instregex "LEAVE64")>; -def: InstRW<[HWWriteResGroup36], (instregex "SCASB")>; -def: InstRW<[HWWriteResGroup36], (instregex "SCASL")>; -def: InstRW<[HWWriteResGroup36], (instregex "SCASQ")>; -def: InstRW<[HWWriteResGroup36], (instregex "SCASW")>; -def HWWriteResGroup37 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup37], (instregex "PSLLDrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSLLQrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSLLWrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSRADrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSRAWrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSRLDrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSRLQrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PSRLWrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "PTESTrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSLLDri")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSLLQri")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSLLWri")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSRADrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSRAWrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSRLDrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSRLQrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPSRLWrm")>; -def: InstRW<[HWWriteResGroup37], (instregex "VPTESTrm")>; - -def HWWriteResGroup38 : SchedWriteRes<[HWPort0,HWPort01,HWPort23]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; +// Starting with P2. +def Write2P237_P4 : SchedWriteRes<[HWPort237, HWPort4]> { + let Latency = 1; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup38], (instregex "FLDCW16m")>; -def HWWriteResGroup39 : SchedWriteRes<[HWPort0,HWPort23,HWPort0156]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; +// Starting with P5. +def WriteP5 : SchedWriteRes<[HWPort5]>; +def WriteP5Ld : SchedWriteRes<[HWPort5, HWPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup39], (instregex "LDMXCSR")>; -def: InstRW<[HWWriteResGroup39], (instregex "VLDMXCSR")>; -def HWWriteResGroup40 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { +// Notation: +// - r: register. +// - mm: 64 bit mmx register. +// - x = 128 bit xmm register. +// - (x)mm = mmx or xmm register. +// - y = 256 bit ymm register. +// - v = any vector register. +// - m = memory. + +//=== Integer Instructions ===// +//-- Move instructions --// + +// MOV. +// r16,m. +def : InstRW<[WriteALULd], (instregex "MOV16rm")>; + +// MOVSX, MOVZX. +// r,m. +def : InstRW<[WriteLoad], (instregex "MOV(S|Z)X32rm(8|16)")>; + +// CMOVcc. +// r,r. +def : InstRW<[Write2P0156_Lat2], + (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rr")>; +// r,m. +def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], + (instregex "CMOV(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)(16|32|64)rm")>; + +// XCHG. +// r,r. +def WriteXCHG : SchedWriteRes<[HWPort0156]> { let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [3]; } -def: InstRW<[HWWriteResGroup40], (instregex "LRETQ")>; -def: InstRW<[HWWriteResGroup40], (instregex "RETQ")>; -def HWWriteResGroup41 : SchedWriteRes<[HWPort23,HWPort0,HWPort15]> { - let Latency = 2; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; +def : InstRW<[WriteXCHG], (instregex "XCHG(8|16|32|64)rr", "XCHG(16|32|64)ar")>; + +// r,m. +def WriteXCHGrm : SchedWriteRes<[]> { + let Latency = 21; + let NumMicroOps = 8; } -def: InstRW<[HWWriteResGroup41], (instregex "BEXTR32rm")>; -def: InstRW<[HWWriteResGroup41], (instregex "BEXTR64rm")>; +def : InstRW<[WriteXCHGrm], (instregex "XCHG(8|16|32|64)rm")>; -def HWWriteResGroup42 : SchedWriteRes<[HWPort23,HWPort0,HWPort0156]> { - let Latency = 2; +// XLAT. +def WriteXLAT : SchedWriteRes<[]> { + let Latency = 7; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup42], (instregex "ADC64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "ADC8rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVAE64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVB64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVE64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVG64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVGE64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVL64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVLE64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVNE64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVNO64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVNP64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVNS64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVO64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVP64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "CMOVS64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "SBB64rm")>; -def: InstRW<[HWWriteResGroup42], (instregex "SBB8rm")>; - -def HWWriteResGroup43 : SchedWriteRes<[HWPort4,HWPort6,HWPort237,HWPort0156]> { - let Latency = 2; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; } -def: InstRW<[HWWriteResGroup43], (instregex "CALL64r")>; -def: InstRW<[HWWriteResGroup43], (instregex "SETAm")>; -def: InstRW<[HWWriteResGroup43], (instregex "SETBEm")>; +def : InstRW<[WriteXLAT], (instregex "XLAT")>; -def HWWriteResGroup44 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[HWWriteResGroup44], (instregex "ROL64m1")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROL64mi")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROL8m1")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROL8mi")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROR64m1")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROR64mi")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROR8m1")>; -def: InstRW<[HWWriteResGroup44], (instregex "ROR8mi")>; - -def HWWriteResGroup45 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[HWWriteResGroup45], (instregex "XADD64rm")>; -def: InstRW<[HWWriteResGroup45], (instregex "XADD8rm")>; +// PUSH. +// m. +def : InstRW<[Write2P237_P4], (instregex "PUSH(16|32)rmm")>; -def HWWriteResGroup46 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { - let Latency = 2; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,1,1]; +// PUSHF. +def WritePushF : SchedWriteRes<[HWPort1, HWPort4, HWPort237, HWPort06]> { + let NumMicroOps = 4; } -def: InstRW<[HWWriteResGroup46], (instregex "CALL64m")>; -def: InstRW<[HWWriteResGroup46], (instregex "FARCALL64")>; +def : InstRW<[WritePushF], (instregex "PUSHF(16|32)")>; -def HWWriteResGroup47 : SchedWriteRes<[HWPort0]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// PUSHA. +def WritePushA : SchedWriteRes<[]> { + let NumMicroOps = 19; } -def: InstRW<[HWWriteResGroup47], (instregex "MOVMSKPDrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "MOVMSKPSrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "PMOVMSKBrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "VMOVMSKPDYrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "VMOVMSKPDrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "VMOVMSKPSrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "VPMOVMSKBYrr")>; -def: InstRW<[HWWriteResGroup47], (instregex "VPMOVMSKBrr")>; +def : InstRW<[WritePushA], (instregex "PUSHA(16|32)")>; -def HWWriteResGroup48 : SchedWriteRes<[HWPort1]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// POP. +// m. +def : InstRW<[Write2P237_P4], (instregex "POP(16|32)rmm")>; + +// POPF. +def WritePopF : SchedWriteRes<[]> { + let NumMicroOps = 9; } -def: InstRW<[HWWriteResGroup48], (instregex "ADDPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "ADDPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "ADDSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "ADDSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "ADDSUBPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "ADDSUBPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "BSF32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "BSR32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "CMPPDrri")>; -def: InstRW<[HWWriteResGroup48], (instregex "CMPPSrri")>; -def: InstRW<[HWWriteResGroup48], (instregex "CMPSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "CMPSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "COMISDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "COMISSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "CVTDQ2PSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "CVTPS2DQrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "CVTTPS2DQrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "IMUL32rri8")>; -def: InstRW<[HWWriteResGroup48], (instregex "IMUL64rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "IMUL8r")>; -def: InstRW<[HWWriteResGroup48], (instregex "LZCNT32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MAXPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MAXPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MAXSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MAXSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MINPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MINPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MINSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MINSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MMX_CVTPI2PSirr")>; -def: InstRW<[HWWriteResGroup48], (instregex "MUL8r")>; -def: InstRW<[HWWriteResGroup48], (instregex "PDEP32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "PDEP64rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "PEXT32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "PEXT64rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "POPCNT32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "SHLD32rri8")>; -def: InstRW<[HWWriteResGroup48], (instregex "SHRD32rri8")>; -def: InstRW<[HWWriteResGroup48], (instregex "SUBPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "SUBPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "SUBSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "SUBSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "TZCNT32rr")>; -def: InstRW<[HWWriteResGroup48], (instregex "UCOMISDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "UCOMISSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDPDYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDPSYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPDYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPSYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VADDSUBPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCMPPDYrri")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCMPPDrri")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCMPPSYrri")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCMPPSrri")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCMPSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCMPSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCOMISDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCOMISSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCVTDQ2PSYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCVTDQ2PSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCVTPS2DQYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCVTPS2DQrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMAXPDYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMAXPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMAXPSYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMAXPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMAXSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMAXSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMINPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMINPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMINSDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VMINSSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VSUBPDYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VSUBPDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VSUBPSYrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VSUBPSrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VUCOMISDrr")>; -def: InstRW<[HWWriteResGroup48], (instregex "VUCOMISSrr")>; - -def HWWriteResGroup49 : SchedWriteRes<[HWPort5]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; +def : InstRW<[WritePopF], (instregex "POPF(16|32)")>; + +// POPA. +def WritePopA : SchedWriteRes<[]> { + let NumMicroOps = 18; } -def: InstRW<[HWWriteResGroup49], (instregex "KSHIFTRDri")>; -def: InstRW<[HWWriteResGroup49], (instregex "KSHIFTRWri")>; -def: InstRW<[HWWriteResGroup49], (instregex "VBROADCASTSDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VBROADCASTSSrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VEXTRACTF128rr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VEXTRACTI128rr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VINSERTF128rr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VINSERTI128rr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTBYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTBrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTQYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTWYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPBROADCASTWrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPERM2I128rr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPERMDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPERMQYri")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXBDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXBQYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXBWYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXDQYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXWDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVSXWQYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXBDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXBQYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXBWYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXDQYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXWDYrr")>; -def: InstRW<[HWWriteResGroup49], (instregex "VPMOVZXWQYrr")>; - -def HWWriteResGroup50 : SchedWriteRes<[HWPort1,HWPort23]> { - let Latency = 3; +def : InstRW<[WritePopA], (instregex "POPA(16|32)")>; + +// LAHF SAHF. +def : InstRW<[WriteP06], (instregex "(S|L)AHF")>; + +// BSWAP. +// r32. +def WriteBSwap32 : SchedWriteRes<[HWPort15]>; +def : InstRW<[WriteBSwap32], (instregex "BSWAP32r")>; + +// r64. +def WriteBSwap64 : SchedWriteRes<[HWPort06, HWPort15]> { let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup50], (instregex "ADDPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "ADDPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "ADDSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "ADDSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "ADDSUBPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "ADDSUBPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "BSF64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "BSR64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "CMPPDrmi")>; -def: InstRW<[HWWriteResGroup50], (instregex "CMPPSrmi")>; -def: InstRW<[HWWriteResGroup50], (instregex "CMPSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "COMISDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "COMISSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "CVTDQ2PSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "CVTPS2DQrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "CVTTPS2DQrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "IMUL64m")>; -def: InstRW<[HWWriteResGroup50], (instregex "IMUL64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "IMUL8m")>; -def: InstRW<[HWWriteResGroup50], (instregex "LZCNT64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MAXSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MINSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPI2PSirm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTPS2PIirm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MMX_CVTTPS2PIirm")>; -def: InstRW<[HWWriteResGroup50], (instregex "MUL64m")>; -def: InstRW<[HWWriteResGroup50], (instregex "MUL8m")>; -def: InstRW<[HWWriteResGroup50], (instregex "PDEP32rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "PDEP64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "PEXT32rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "PEXT64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "POPCNT64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "SUBPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "SUBPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "SUBSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "SUBSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "TZCNT64rm")>; -def: InstRW<[HWWriteResGroup50], (instregex "UCOMISDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "UCOMISSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDPDYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDPSYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPDYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPSYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VADDSUBPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCMPPDYrmi")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCMPPDrmi")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCMPPSYrmi")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCMPPSrmi")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCMPSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCMPSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCOMISDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCOMISSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCVTDQ2PSYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCVTDQ2PSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCVTPS2DQYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCVTPS2DQrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPDYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPSYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMAXSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VMINSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VSUBPDYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VSUBPDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VSUBPSYrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VSUBPSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VSUBSDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VSUBSSrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VUCOMISDrm")>; -def: InstRW<[HWWriteResGroup50], (instregex "VUCOMISSrm")>; - -def HWWriteResGroup51 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 3; +} +def : InstRW<[WriteBSwap64], (instregex "BSWAP64r")>; + +// MOVBE. +// r16,m16 / r64,m64. +def : InstRW<[Write2P0156_Lat2Ld], (instregex "MOVBE(16|64)rm")>; + +// r32, m32. +def WriteMoveBE32rm : SchedWriteRes<[HWPort15, HWPort23]> { let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup51], (instregex "VPERM2F128rm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPERM2I128rm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPERMDYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPERMPDYmi")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPERMPSYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPERMQYmi")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXBDYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXBQYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXBWYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXDQYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXWDYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVSXWQYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXBDYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXBQYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXBWYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXDQYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXWDYrm")>; -def: InstRW<[HWWriteResGroup51], (instregex "VPMOVZXWQYrm")>; - -def HWWriteResGroup52 : SchedWriteRes<[HWPort0156]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [3]; } -def: InstRW<[HWWriteResGroup52], (instregex "XADD32rr")>; -def: InstRW<[HWWriteResGroup52], (instregex "XADD8rr")>; -def: InstRW<[HWWriteResGroup52], (instregex "XCHG8rr")>; +def : InstRW<[WriteMoveBE32rm], (instregex "MOVBE32rm")>; -def HWWriteResGroup53 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 3; +// m16,r16. +def WriteMoveBE16mr : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { let NumMicroOps = 3; - let ResourceCycles = [2,1]; } -def: InstRW<[HWWriteResGroup53], (instregex "VPSLLVDYrr")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPSLLVDrr")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPSRAVDYrr")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPSRAVDrr")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPSRLVDYrr")>; -def: InstRW<[HWWriteResGroup53], (instregex "VPSRLVDrr")>; +def : InstRW<[WriteMoveBE16mr], (instregex "MOVBE16mr")>; -def HWWriteResGroup54 : SchedWriteRes<[HWPort5,HWPort15]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHADDSWrr64")>; -def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHADDWrr64")>; -def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHADDrr64")>; -def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHSUBDrr64")>; -def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHSUBSWrr64")>; -def: InstRW<[HWWriteResGroup54], (instregex "MMX_PHSUBWrr64")>; -def: InstRW<[HWWriteResGroup54], (instregex "PHADDDrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "PHADDSWrr128")>; -def: InstRW<[HWWriteResGroup54], (instregex "PHADDWrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "PHSUBDrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "PHSUBSWrr128")>; -def: InstRW<[HWWriteResGroup54], (instregex "PHSUBWrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHADDDYrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHADDDrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHADDSWrr128")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHADDSWrr256")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHADDWYrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHADDWrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBDYrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBDrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBSWrr128")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBSWrr256")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBWYrr")>; -def: InstRW<[HWWriteResGroup54], (instregex "VPHSUBWrr")>; - -def HWWriteResGroup55 : SchedWriteRes<[HWPort5,HWPort0156]> { - let Latency = 3; +// m32,r32. +def WriteMoveBE32mr : SchedWriteRes<[HWPort15, HWPort237, HWPort4]> { let NumMicroOps = 3; - let ResourceCycles = [2,1]; } -def: InstRW<[HWWriteResGroup55], (instregex "MMX_PACKSSDWirr")>; -def: InstRW<[HWWriteResGroup55], (instregex "MMX_PACKSSWBirr")>; -def: InstRW<[HWWriteResGroup55], (instregex "MMX_PACKUSWBirr")>; +def : InstRW<[WriteMoveBE32mr], (instregex "MOVBE32mr")>; -def HWWriteResGroup56 : SchedWriteRes<[HWPort6,HWPort0156]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; +// m64,r64. +def WriteMoveBE64mr : SchedWriteRes<[HWPort06, HWPort15, HWPort237, HWPort4]> { + let NumMicroOps = 4; } -def: InstRW<[HWWriteResGroup56], (instregex "CLD")>; +def : InstRW<[WriteMoveBE64mr], (instregex "MOVBE64mr")>; + +//-- Arithmetic instructions --// + +// ADD SUB. +// m,r/i. +def : InstRW<[Write2P0156_2P237_P4], + (instregex "(ADD|SUB)(8|16|32|64)m(r|i)", + "(ADD|SUB)(8|16|32|64)mi8", "(ADD|SUB)64mi32")>; + +// ADC SBB. +// r,r/i. +def : InstRW<[Write2P0156_Lat2], (instregex "(ADC|SBB)(8|16|32|64)r(r|i)", + "(ADC|SBB)(16|32|64)ri8", + "(ADC|SBB)64ri32", + "(ADC|SBB)(8|16|32|64)rr_REV")>; + +// r,m. +def : InstRW<[Write2P0156_Lat2Ld, ReadAfterLd], (instregex "(ADC|SBB)(8|16|32|64)rm")>; + +// m,r/i. +def : InstRW<[Write3P0156_2P237_P4], + (instregex "(ADC|SBB)(8|16|32|64)m(r|i)", + "(ADC|SBB)(16|32|64)mi8", + "(ADC|SBB)64mi32")>; + +// INC DEC NOT NEG. +// m. +def : InstRW<[WriteP0156_2P237_P4], + (instregex "(INC|DEC|NOT|NEG)(8|16|32|64)m", + "(INC|DEC)64(16|32)m")>; + +// MUL IMUL. +// r16. +def WriteMul16 : SchedWriteRes<[HWPort1, HWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; +} +def : InstRW<[WriteMul16], (instregex "IMUL16r", "MUL16r")>; -def HWWriteResGroup57 : SchedWriteRes<[HWPort0,HWPort0156]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[HWWriteResGroup57], (instregex "CMOVA32rr")>; -def: InstRW<[HWWriteResGroup57], (instregex "CMOVBE32rr")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCL32ri")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCL64r1")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCL8r1")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCL8ri")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCR32ri")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCR64r1")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCR8r1")>; -def: InstRW<[HWWriteResGroup57], (instregex "RCR8ri")>; -def: InstRW<[HWWriteResGroup57], (instregex "SHL64rCL")>; -def: InstRW<[HWWriteResGroup57], (instregex "SHL8rCL")>; - -def HWWriteResGroup58 : SchedWriteRes<[HWPort0,HWPort4,HWPort237]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; +// m16. +def WriteMul16Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { + let Latency = 8; + let NumMicroOps = 5; } -def: InstRW<[HWWriteResGroup58], (instregex "FNSTSWm")>; +def : InstRW<[WriteMul16Ld], (instregex "IMUL16m", "MUL16m")>; -def HWWriteResGroup59 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; +// r32. +def WriteMul32 : SchedWriteRes<[HWPort1, HWPort0156]> { + let Latency = 4; + let NumMicroOps = 3; } -def: InstRW<[HWWriteResGroup59], (instregex "VPSLLVDYrm")>; -def: InstRW<[HWWriteResGroup59], (instregex "VPSLLVDrm")>; -def: InstRW<[HWWriteResGroup59], (instregex "VPSRAVDYrm")>; -def: InstRW<[HWWriteResGroup59], (instregex "VPSRAVDrm")>; -def: InstRW<[HWWriteResGroup59], (instregex "VPSRLVDYrm")>; -def: InstRW<[HWWriteResGroup59], (instregex "VPSRLVDrm")>; +def : InstRW<[WriteMul32], (instregex "IMUL32r", "MUL32r")>; -def HWWriteResGroup60 : SchedWriteRes<[HWPort5,HWPort23,HWPort15]> { - let Latency = 3; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHADDSWrm64")>; -def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHADDWrm64")>; -def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHADDrm64")>; -def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHSUBDrm64")>; -def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHSUBSWrm64")>; -def: InstRW<[HWWriteResGroup60], (instregex "MMX_PHSUBWrm64")>; -def: InstRW<[HWWriteResGroup60], (instregex "PHADDDrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "PHADDSWrm128")>; -def: InstRW<[HWWriteResGroup60], (instregex "PHADDWrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "PHSUBDrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "PHSUBSWrm128")>; -def: InstRW<[HWWriteResGroup60], (instregex "PHSUBWrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHADDDYrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHADDDrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHADDSWrm128")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHADDSWrm256")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHADDWYrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHADDWrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBDYrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBDrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBSWrm128")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBSWrm256")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBWYrm")>; -def: InstRW<[HWWriteResGroup60], (instregex "VPHSUBWrm")>; - -def HWWriteResGroup61 : SchedWriteRes<[HWPort23,HWPort0,HWPort0156]> { - let Latency = 3; +// m32. +def WriteMul32Ld : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { + let Latency = 8; let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; } -def: InstRW<[HWWriteResGroup61], (instregex "CMOVA64rm")>; -def: InstRW<[HWWriteResGroup61], (instregex "CMOVBE64rm")>; +def : InstRW<[WriteMul32Ld], (instregex "IMUL32m", "MUL32m")>; -def HWWriteResGroup62 : SchedWriteRes<[HWPort23,HWPort237,HWPort0,HWPort0156]> { - let Latency = 3; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[HWWriteResGroup62], (instregex "RCL64m1")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCL64mi")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCL8m1")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCL8mi")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCR64m1")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCR64mi")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCR8m1")>; -def: InstRW<[HWWriteResGroup62], (instregex "RCR8mi")>; - -def HWWriteResGroup63 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0156]> { - let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,3]; -} -def: InstRW<[HWWriteResGroup63], (instregex "ADC64mi8")>; -def: InstRW<[HWWriteResGroup63], (instregex "ADC8mi")>; -def: InstRW<[HWWriteResGroup63], (instregex "ADD8mi")>; -def: InstRW<[HWWriteResGroup63], (instregex "AND8mi")>; -def: InstRW<[HWWriteResGroup63], (instregex "OR8mi")>; -def: InstRW<[HWWriteResGroup63], (instregex "SUB8mi")>; -def: InstRW<[HWWriteResGroup63], (instregex "XCHG64rm")>; -def: InstRW<[HWWriteResGroup63], (instregex "XCHG8rm")>; -def: InstRW<[HWWriteResGroup63], (instregex "XOR8mi")>; - -def HWWriteResGroup64 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0,HWPort0156]> { +// r64. +def WriteMul64 : SchedWriteRes<[HWPort1, HWPort6]> { let Latency = 3; - let NumMicroOps = 6; - let ResourceCycles = [1,1,1,2,1]; -} -def: InstRW<[HWWriteResGroup64], (instregex "ADC64mr")>; -def: InstRW<[HWWriteResGroup64], (instregex "ADC8mr")>; -def: InstRW<[HWWriteResGroup64], (instregex "CMPXCHG64rm")>; -def: InstRW<[HWWriteResGroup64], (instregex "CMPXCHG8rm")>; -def: InstRW<[HWWriteResGroup64], (instregex "SBB64mi8")>; -def: InstRW<[HWWriteResGroup64], (instregex "SBB64mr")>; -def: InstRW<[HWWriteResGroup64], (instregex "SBB8mi")>; -def: InstRW<[HWWriteResGroup64], (instregex "SBB8mr")>; -def: InstRW<[HWWriteResGroup64], (instregex "SHL64mCL")>; -def: InstRW<[HWWriteResGroup64], (instregex "SHL8mCL")>; - -def HWWriteResGroup65 : SchedWriteRes<[HWPort0,HWPort1]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup65], (instregex "CVTSD2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTSD2SIrr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTSS2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTSS2SIrr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTTSD2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTTSD2SIrr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTTSS2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "CVTTSS2SIrr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTSS2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTSS2SIrr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSD2SIrr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSS2SI64rr")>; -def: InstRW<[HWWriteResGroup65], (instregex "VCVTTSS2SIrr")>; - -def HWWriteResGroup66 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup66], (instregex "VCVTPS2PDYrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSLLDrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSLLQrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSLLWrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSRADYrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSRAWYrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSRLDYrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSRLQYrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPSRLWYrr")>; -def: InstRW<[HWWriteResGroup66], (instregex "VPTESTYrr")>; - -def HWWriteResGroup67 : SchedWriteRes<[HWPort1,HWPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup67], (instregex "CVTDQ2PDrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTPD2DQrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTPD2PSrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTSD2SSrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTSI2SD64rr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTSI2SDrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTSI2SSrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "CVTTPD2DQrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTPD2PIirr")>; -def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTPI2PDirr")>; -def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTPS2PIirr")>; -def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTTPD2PIirr")>; -def: InstRW<[HWWriteResGroup67], (instregex "MMX_CVTTPS2PIirr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTDQ2PDrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTPD2DQrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTPD2PSrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTPS2PHrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTSI2SD64rr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTSI2SDrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTSI2SSrr")>; -def: InstRW<[HWWriteResGroup67], (instregex "VCVTTPD2DQrr")>; - -def HWWriteResGroup68 : SchedWriteRes<[HWPort1,HWPort6]> { - let Latency = 4; let NumMicroOps = 2; - let ResourceCycles = [1,1]; } -def: InstRW<[HWWriteResGroup68], (instregex "IMUL64r")>; -def: InstRW<[HWWriteResGroup68], (instregex "MUL64r")>; -def: InstRW<[HWWriteResGroup68], (instregex "MULX64rr")>; +def : InstRW<[WriteMul64], (instregex "IMUL64r", "MUL64r")>; -def HWWriteResGroup69 : SchedWriteRes<[HWPort0,HWPort1,HWPort23]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup69], (instregex "CVTSD2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "CVTSD2SIrm")>; -def: InstRW<[HWWriteResGroup69], (instregex "CVTSS2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "CVTSS2SIrm")>; -def: InstRW<[HWWriteResGroup69], (instregex "CVTTSD2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "CVTTSD2SIrm")>; -def: InstRW<[HWWriteResGroup69], (instregex "CVTTSS2SIrm")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTSD2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTSS2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTSS2SIrm")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSD2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSS2SI64rm")>; -def: InstRW<[HWWriteResGroup69], (instregex "VCVTTSS2SIrm")>; - -def HWWriteResGroup70 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 4; +// m64. +def WriteMul64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { + let Latency = 7; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup70], (instregex "VCVTPS2PDYrm")>; -def: InstRW<[HWWriteResGroup70], (instregex "VPTESTYrm")>; +def : InstRW<[WriteMul64Ld], (instregex "IMUL64m", "MUL64m")>; -def HWWriteResGroup71 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[HWWriteResGroup71], (instregex "CVTDQ2PDrm")>; -def: InstRW<[HWWriteResGroup71], (instregex "CVTPD2DQrm")>; -def: InstRW<[HWWriteResGroup71], (instregex "CVTPD2PSrm")>; -def: InstRW<[HWWriteResGroup71], (instregex "CVTSD2SSrm")>; -def: InstRW<[HWWriteResGroup71], (instregex "CVTTPD2DQrm")>; -def: InstRW<[HWWriteResGroup71], (instregex "MMX_CVTPD2PIirm")>; -def: InstRW<[HWWriteResGroup71], (instregex "MMX_CVTPI2PDirm")>; -def: InstRW<[HWWriteResGroup71], (instregex "MMX_CVTTPD2PIirm")>; -def: InstRW<[HWWriteResGroup71], (instregex "VCVTDQ2PDrm")>; -def: InstRW<[HWWriteResGroup71], (instregex "VCVTSD2SSrm")>; - -def HWWriteResGroup72 : SchedWriteRes<[HWPort1,HWPort6,HWPort23]> { +// r16,r16. +def WriteMul16rri : SchedWriteRes<[HWPort1, HWPort0156]> { let Latency = 4; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let NumMicroOps = 2; } -def: InstRW<[HWWriteResGroup72], (instregex "MULX64rm")>; +def : InstRW<[WriteMul16rri], (instregex "IMUL16rri", "IMUL16rri8")>; -def HWWriteResGroup73 : SchedWriteRes<[HWPort5,HWPort23,HWPort015]> { - let Latency = 4; +// r16,m16. +def WriteMul16rmi : SchedWriteRes<[HWPort1, HWPort0156, HWPort23]> { + let Latency = 8; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; } -def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTBYrm")>; -def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTBrm")>; -def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTWYrm")>; -def: InstRW<[HWWriteResGroup73], (instregex "VPBROADCASTWrm")>; +def : InstRW<[WriteMul16rmi], (instregex "IMUL16rmi", "IMUL16rmi8")>; -def HWWriteResGroup74 : SchedWriteRes<[HWPort0156]> { +// MULX. +// r32,r32,r32. +def WriteMulX32 : SchedWriteRes<[HWPort1, HWPort056]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [4]; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; } -def: InstRW<[HWWriteResGroup74], (instregex "FNCLEX")>; +def : InstRW<[WriteMulX32], (instregex "MULX32rr")>; -def HWWriteResGroup75 : SchedWriteRes<[HWPort015,HWPort0156]> { - let Latency = 4; +// r32,r32,m32. +def WriteMulX32Ld : SchedWriteRes<[HWPort1, HWPort056, HWPort23]> { + let Latency = 8; let NumMicroOps = 4; - let ResourceCycles = [1,3]; + let ResourceCycles = [1, 2, 1]; } -def: InstRW<[HWWriteResGroup75], (instregex "VZEROUPPER")>; +def : InstRW<[WriteMulX32Ld], (instregex "MULX32rm")>; -def HWWriteResGroup76 : SchedWriteRes<[HWPort1,HWPort6,HWPort0156]> { +// r64,r64,r64. +def WriteMulX64 : SchedWriteRes<[HWPort1, HWPort6]> { let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; + let NumMicroOps = 2; } -def: InstRW<[HWWriteResGroup76], (instregex "LAR32rr")>; +def : InstRW<[WriteMulX64], (instregex "MULX64rr")>; -def HWWriteResGroup77 : SchedWriteRes<[HWPort0,HWPort4,HWPort237,HWPort15]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; +// r64,r64,m64. +def WriteMulX64Ld : SchedWriteRes<[HWPort1, HWPort6, HWPort23]> { + let Latency = 8; + let NumMicroOps = 3; } -def: InstRW<[HWWriteResGroup77], (instregex "VMASKMOVPDYrm")>; -def: InstRW<[HWWriteResGroup77], (instregex "VMASKMOVPDmr")>; -def: InstRW<[HWWriteResGroup77], (instregex "VMASKMOVPSmr")>; -def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVDYmr")>; -def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVDmr")>; -def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVQYmr")>; -def: InstRW<[HWWriteResGroup77], (instregex "VPMASKMOVQmr")>; +def : InstRW<[WriteMulX64Ld], (instregex "MULX64rm")>; -def HWWriteResGroup78 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; +// DIV. +// r8. +def WriteDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 22; + let NumMicroOps = 9; } -def: InstRW<[HWWriteResGroup78], (instregex "VCVTPS2PHmr")>; +def : InstRW<[WriteDiv8], (instregex "DIV8r")>; -def HWWriteResGroup79 : SchedWriteRes<[HWPort1,HWPort23,HWPort237,HWPort0156]> { - let Latency = 4; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; +// r16. +def WriteDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 23; + let NumMicroOps = 10; } -def: InstRW<[HWWriteResGroup79], (instregex "SHLD64mri8")>; -def: InstRW<[HWWriteResGroup79], (instregex "SHRD64mri8")>; +def : InstRW<[WriteDiv16], (instregex "DIV16r")>; -def HWWriteResGroup80 : SchedWriteRes<[HWPort1,HWPort6,HWPort23,HWPort0156]> { +// r32. +def WriteDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 22; + let NumMicroOps = 10; +} +def : InstRW<[WriteDiv32], (instregex "DIV32r")>; + +// r64. +def WriteDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 32; + let NumMicroOps = 36; +} +def : InstRW<[WriteDiv64], (instregex "DIV64r")>; + +// IDIV. +// r8. +def WriteIDiv8 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 23; + let NumMicroOps = 9; +} +def : InstRW<[WriteIDiv8], (instregex "IDIV8r")>; + +// r16. +def WriteIDiv16 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 23; + let NumMicroOps = 10; +} +def : InstRW<[WriteIDiv16], (instregex "IDIV16r")>; + +// r32. +def WriteIDiv32 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 22; + let NumMicroOps = 9; +} +def : InstRW<[WriteIDiv32], (instregex "IDIV32r")>; + +// r64. +def WriteIDiv64 : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort6]> { + let Latency = 39; + let NumMicroOps = 59; +} +def : InstRW<[WriteIDiv64], (instregex "IDIV64r")>; + +//-- Logic instructions --// + +// AND OR XOR. +// m,r/i. +def : InstRW<[Write2P0156_2P237_P4], + (instregex "(AND|OR|XOR)(8|16|32|64)m(r|i)", + "(AND|OR|XOR)(8|16|32|64)mi8", "(AND|OR|XOR)64mi32")>; + +// SHR SHL SAR. +// m,i. +def WriteShiftRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteShiftRMW], (instregex "S(A|H)(R|L)(8|16|32|64)m(i|1)")>; + +// r,cl. +def : InstRW<[Write3P06_Lat2], (instregex "S(A|H)(R|L)(8|16|32|64)rCL")>; + +// m,cl. +def WriteShiftClLdRMW : SchedWriteRes<[HWPort06, HWPort23, HWPort4]> { + let NumMicroOps = 6; + let ResourceCycles = [3, 2, 1]; +} +def : InstRW<[WriteShiftClLdRMW], (instregex "S(A|H)(R|L)(8|16|32|64)mCL")>; + +// ROR ROL. +// r,1. +def : InstRW<[Write2P06], (instregex "RO(R|L)(8|16|32|64)r1")>; + +// m,i. +def WriteRotateRMW : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { + let NumMicroOps = 5; + let ResourceCycles = [2, 2, 1]; +} +def : InstRW<[WriteRotateRMW], (instregex "RO(R|L)(8|16|32|64)mi")>; + +// r,cl. +def : InstRW<[Write3P06_Lat2], (instregex "RO(R|L)(8|16|32|64)rCL")>; + +// m,cl. +def WriteRotateRMWCL : SchedWriteRes<[]> { + let NumMicroOps = 6; +} +def : InstRW<[WriteRotateRMWCL], (instregex "RO(R|L)(8|16|32|64)mCL")>; + +// RCR RCL. +// r,1. +def WriteRCr1 : SchedWriteRes<[HWPort06, HWPort0156]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteRCr1], (instregex "RC(R|L)(8|16|32|64)r1")>; + +// m,1. +def WriteRCm1 : SchedWriteRes<[]> { + let NumMicroOps = 6; +} +def : InstRW<[WriteRCm1], (instregex "RC(R|L)(8|16|32|64)m1")>; + +// r,i. +def WriteRCri : SchedWriteRes<[HWPort0156]> { + let Latency = 6; + let NumMicroOps = 8; +} +def : InstRW<[WriteRCri], (instregex "RC(R|L)(8|16|32|64)r(i|CL)")>; + +// m,i. +def WriteRCmi : SchedWriteRes<[]> { + let NumMicroOps = 11; +} +def : InstRW<[WriteRCmi], (instregex "RC(R|L)(8|16|32|64)m(i|CL)")>; + +// SHRD SHLD. +// r,r,i. +def WriteShDrr : SchedWriteRes<[HWPort1]> { + let Latency = 3; +} +def : InstRW<[WriteShDrr], (instregex "SH(R|L)D(16|32|64)rri8")>; + +// m,r,i. +def WriteShDmr : SchedWriteRes<[]> { + let NumMicroOps = 5; +} +def : InstRW<[WriteShDmr], (instregex "SH(R|L)D(16|32|64)mri8")>; + +// r,r,cl. +def WriteShlDCL : SchedWriteRes<[HWPort0156]> { + let Latency = 3; + let NumMicroOps = 4; +} +def : InstRW<[WriteShlDCL], (instregex "SHLD(16|32|64)rrCL")>; + +// r,r,cl. +def WriteShrDCL : SchedWriteRes<[HWPort0156]> { + let Latency = 4; + let NumMicroOps = 4; +} +def : InstRW<[WriteShrDCL], (instregex "SHRD(16|32|64)rrCL")>; + +// m,r,cl. +def WriteShDmrCL : SchedWriteRes<[]> { + let NumMicroOps = 7; +} +def : InstRW<[WriteShDmrCL], (instregex "SH(R|L)D(16|32|64)mrCL")>; + +// BT. +// r,r/i. +def : InstRW<[WriteShift], (instregex "BT(16|32|64)r(r|i8)")>; + +// m,r. +def WriteBTmr : SchedWriteRes<[]> { + let NumMicroOps = 10; +} +def : InstRW<[WriteBTmr], (instregex "BT(16|32|64)mr")>; + +// m,i. +def : InstRW<[WriteShiftLd], (instregex "BT(16|32|64)mi8")>; + +// BTR BTS BTC. +// r,r,i. +def : InstRW<[WriteShift], (instregex "BT(R|S|C)(16|32|64)r(r|i8)")>; + +// m,r. +def WriteBTRSCmr : SchedWriteRes<[]> { + let NumMicroOps = 11; +} +def : InstRW<[WriteBTRSCmr], (instregex "BT(R|S|C)(16|32|64)mr")>; + +// m,i. +def : InstRW<[WriteShiftLd], (instregex "BT(R|S|C)(16|32|64)mi8")>; + +// BSF BSR. +// r,r. +def : InstRW<[WriteP1_Lat3], (instregex "BS(R|F)(16|32|64)rr")>; +// r,m. +def : InstRW<[WriteP1_Lat3Ld], (instregex "BS(R|F)(16|32|64)rm")>; + +// SETcc. +// r. +def : InstRW<[WriteShift], + (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)r")>; +// m. +def WriteSetCCm : SchedWriteRes<[HWPort06, HWPort237, HWPort4]> { + let NumMicroOps = 3; +} +def : InstRW<[WriteSetCCm], + (instregex "SET(O|NO|B|AE|E|NE|BE|A|S|NS|P|NP|L|GE|LE|G)m")>; + +// CLD STD. +def WriteCldStd : SchedWriteRes<[HWPort15, HWPort6]> { + let NumMicroOps = 3; +} +def : InstRW<[WriteCldStd], (instregex "STD", "CLD")>; + +// LZCNT TZCNT. +// r,r. +def : InstRW<[WriteP1_Lat3], (instregex "(L|TZCNT)(16|32|64)rr")>; +// r,m. +def : InstRW<[WriteP1_Lat3Ld], (instregex "(L|TZCNT)(16|32|64)rm")>; + +// ANDN. +// r,r. +def : InstRW<[WriteP15], (instregex "ANDN(32|64)rr")>; +// r,m. +def : InstRW<[WriteP15Ld], (instregex "ANDN(32|64)rm")>; + +// BLSI BLSMSK BLSR. +// r,r. +def : InstRW<[WriteP15], (instregex "BLS(I|MSK|R)(32|64)rr")>; +// r,m. +def : InstRW<[WriteP15Ld], (instregex "BLS(I|MSK|R)(32|64)rm")>; + +// BEXTR. +// r,r,r. +def : InstRW<[Write2P0156_Lat2], (instregex "BEXTR(32|64)rr")>; +// r,m,r. +def : InstRW<[Write2P0156_Lat2Ld], (instregex "BEXTR(32|64)rm")>; + +// BZHI. +// r,r,r. +def : InstRW<[WriteP15], (instregex "BZHI(32|64)rr")>; +// r,m,r. +def : InstRW<[WriteP15Ld], (instregex "BZHI(32|64)rm")>; + +// PDEP PEXT. +// r,r,r. +def : InstRW<[WriteP1_Lat3], (instregex "PDEP(32|64)rr", "PEXT(32|64)rr")>; +// r,m,r. +def : InstRW<[WriteP1_Lat3Ld], (instregex "PDEP(32|64)rm", "PEXT(32|64)rm")>; + +//-- Control transfer instructions --// + +// J(E|R)CXZ. +def WriteJCXZ : SchedWriteRes<[HWPort0156, HWPort6]> { + let NumMicroOps = 2; +} +def : InstRW<[WriteJCXZ], (instregex "JCXZ", "JECXZ_(32|64)", "JRCXZ")>; + +// LOOP. +def WriteLOOP : SchedWriteRes<[]> { + let NumMicroOps = 7; +} +def : InstRW<[WriteLOOP], (instregex "LOOP")>; + +// LOOP(N)E +def WriteLOOPE : SchedWriteRes<[]> { + let NumMicroOps = 11; +} +def : InstRW<[WriteLOOPE], (instregex "LOOPE", "LOOPNE")>; + +// CALL. +// r. +def WriteCALLr : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { + let NumMicroOps = 3; +} +def : InstRW<[WriteCALLr], (instregex "CALL(16|32)r")>; + +// m. +def WriteCALLm : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; +} +def : InstRW<[WriteCALLm], (instregex "CALL(16|32)m")>; + +// RET. +def WriteRET : SchedWriteRes<[HWPort237, HWPort6]> { + let NumMicroOps = 2; +} +def : InstRW<[WriteRET], (instregex "RET(L|Q|W)", "LRET(L|Q|W)")>; + +// i. +def WriteRETI : SchedWriteRes<[HWPort23, HWPort6, HWPort015]> { + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} +def : InstRW<[WriteRETI], (instregex "RETI(L|Q|W)", "LRETI(L|Q|W)")>; + +// BOUND. +// r,m. +def WriteBOUND : SchedWriteRes<[]> { + let NumMicroOps = 15; +} +def : InstRW<[WriteBOUND], (instregex "BOUNDS(16|32)rm")>; + +// INTO. +def WriteINTO : SchedWriteRes<[]> { + let NumMicroOps = 4; +} +def : InstRW<[WriteINTO], (instregex "INTO")>; + +//-- String instructions --// + +// LODSB/W. +def : InstRW<[Write2P0156_P23], (instregex "LODS(B|W)")>; + +// LODSD/Q. +def : InstRW<[WriteP0156_P23], (instregex "LODS(L|Q)")>; + +// STOS. +def WriteSTOS : SchedWriteRes<[HWPort23, HWPort0156, HWPort4]> { + let NumMicroOps = 3; +} +def : InstRW<[WriteSTOS], (instregex "STOS(B|L|Q|W)")>; + +// MOVS. +def WriteMOVS : SchedWriteRes<[HWPort23, HWPort4, HWPort0156]> { let Latency = 4; let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; + let ResourceCycles = [2, 1, 2]; } -def: InstRW<[HWWriteResGroup80], (instregex "LAR32rm")>; -def: InstRW<[HWWriteResGroup80], (instregex "LSL32rm")>; +def : InstRW<[WriteMOVS], (instregex "MOVS(B|L|Q|W)")>; + +// SCAS. +def : InstRW<[Write2P0156_P23], (instregex "SCAS(B|W|L|Q)")>; -def HWWriteResGroup81 : SchedWriteRes<[HWPort4,HWPort237,HWPort0156]> { +// CMPS. +def WriteCMPS : SchedWriteRes<[HWPort23, HWPort0156]> { let Latency = 4; + let NumMicroOps = 5; + let ResourceCycles = [2, 3]; +} +def : InstRW<[WriteCMPS], (instregex "CMPS(B|L|Q|W)")>; + +//-- Synchronization instructions --// + +// XADD. +def WriteXADD : SchedWriteRes<[]> { + let NumMicroOps = 5; +} +def : InstRW<[WriteXADD], (instregex "XADD(8|16|32|64)rm")>; + +// CMPXCHG. +def WriteCMPXCHG : SchedWriteRes<[]> { let NumMicroOps = 6; - let ResourceCycles = [1,1,4]; } -def: InstRW<[HWWriteResGroup81], (instregex "PUSHF16")>; -def: InstRW<[HWWriteResGroup81], (instregex "PUSHF64")>; +def : InstRW<[WriteCMPXCHG], (instregex "CMPXCHG(8|16|32|64)rm")>; -def HWWriteResGroup82 : SchedWriteRes<[HWPort0]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// CMPXCHG8B. +def WriteCMPXCHG8B : SchedWriteRes<[]> { + let NumMicroOps = 15; } -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMADDUBSWrr64")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMADDWDirr")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULHRSWrr64")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULHUWirr")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULHWirr")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULLWirr")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PMULUDQirr")>; -def: InstRW<[HWWriteResGroup82], (instregex "MMX_PSADBWirr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PCMPGTQrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PHMINPOSUWrr128")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMADDUBSWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMADDWDrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMULDQrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMULHRSWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMULHUWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMULHWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMULLWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PMULUDQrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "PSADBWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "RCPPSr")>; -def: InstRW<[HWWriteResGroup82], (instregex "RCPSSr")>; -def: InstRW<[HWWriteResGroup82], (instregex "RSQRTPSr")>; -def: InstRW<[HWWriteResGroup82], (instregex "RSQRTSSr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VMOVMSKPSYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPCMPGTQYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPCMPGTQrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPHMINPOSUWrr128")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMADDUBSWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMADDUBSWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMADDWDYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMADDWDrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULDQYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULDQrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULHRSWYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULHRSWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULHUWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULHWYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULHWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULLWYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULLWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPMULUDQrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPSADBWYrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VPSADBWrr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VRSQRTPSr")>; -def: InstRW<[HWWriteResGroup82], (instregex "VRSQRTSSr")>; - -def HWWriteResGroup83 : SchedWriteRes<[HWPort01]> { - let Latency = 5; +def : InstRW<[WriteCMPXCHG8B], (instregex "CMPXCHG8B")>; + +// CMPXCHG16B. +def WriteCMPXCHG16B : SchedWriteRes<[]> { + let NumMicroOps = 22; +} +def : InstRW<[WriteCMPXCHG16B], (instregex "CMPXCHG16B")>; + +//-- Other --// + +// PAUSE. +def WritePAUSE : SchedWriteRes<[HWPort05, HWPort6]> { + let NumMicroOps = 5; + let ResourceCycles = [1, 3]; +} +def : InstRW<[WritePAUSE], (instregex "PAUSE")>; + +// LEAVE. +def : InstRW<[Write2P0156_P23], (instregex "LEAVE")>; + +// XGETBV. +def WriteXGETBV : SchedWriteRes<[]> { + let NumMicroOps = 8; +} +def : InstRW<[WriteXGETBV], (instregex "XGETBV")>; + +// RDTSC. +def WriteRDTSC : SchedWriteRes<[]> { + let NumMicroOps = 15; +} +def : InstRW<[WriteRDTSC], (instregex "RDTSC")>; + +// RDPMC. +def WriteRDPMC : SchedWriteRes<[]> { + let NumMicroOps = 34; +} +def : InstRW<[WriteRDPMC], (instregex "RDPMC")>; + +// RDRAND. +def WriteRDRAND : SchedWriteRes<[HWPort23, HWPort015]> { + let NumMicroOps = 17; + let ResourceCycles = [1, 16]; +} +def : InstRW<[WriteRDRAND], (instregex "RDRAND(16|32|64)r")>; + +//=== Floating Point x87 Instructions ===// +//-- Move instructions --// + +// FLD. +// m80. +def : InstRW<[WriteP01], (instregex "LD_Frr")>; + +def WriteLD_F80m : SchedWriteRes<[HWPort01, HWPort23]> { + let Latency = 4; + let NumMicroOps = 4; + let ResourceCycles = [2, 2]; +} +def : InstRW<[WriteLD_F80m], (instregex "LD_F80m")>; + +// FBLD. +// m80. +def WriteFBLD : SchedWriteRes<[]> { + let Latency = 47; + let NumMicroOps = 43; +} +def : InstRW<[WriteFBLD], (instregex "FBLDm")>; + +// FST(P). +// r. +def : InstRW<[WriteP01], (instregex "ST_(F|FP)rr")>; + +// m80. +def WriteST_FP80m : SchedWriteRes<[HWPort0156, HWPort23, HWPort4]> { + let NumMicroOps = 7; + let ResourceCycles = [3, 2, 2]; +} +def : InstRW<[WriteST_FP80m], (instregex "ST_FP80m")>; + +// FBSTP. +// m80. +def WriteFBSTP : SchedWriteRes<[]> { + let NumMicroOps = 226; +} +def : InstRW<[WriteFBSTP], (instregex "FBSTPm")>; + +// FXCHG. +def : InstRW<[WriteNop], (instregex "XCH_F")>; + +// FILD. +def WriteFILD : SchedWriteRes<[HWPort01, HWPort23]> { + let Latency = 6; + let NumMicroOps = 2; +} +def : InstRW<[WriteFILD], (instregex "ILD_F(16|32|64)m")>; + +// FIST(P) FISTTP. +def WriteFIST : SchedWriteRes<[HWPort1, HWPort23, HWPort4]> { + let Latency = 7; + let NumMicroOps = 3; +} +def : InstRW<[WriteFIST], (instregex "IST_(F|FP)(16|32)m")>; + +// FLDZ. +def : InstRW<[WriteP01], (instregex "LD_F0")>; + +// FLD1. +def : InstRW<[Write2P01], (instregex "LD_F1")>; + +// FLDPI FLDL2E etc. +def : InstRW<[Write2P01], (instregex "FLDPI", "FLDL2(T|E)" "FLDL(G|N)2")>; + +// FCMOVcc. +def WriteFCMOVcc : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteFCMOVcc], (instregex "CMOV(B|BE|P|NB|NBE|NE|NP)_F")>; + +// FNSTSW. +// AX. +def WriteFNSTSW : SchedWriteRes<[HWPort0, HWPort0156]> { + let NumMicroOps = 2; +} +def : InstRW<[WriteFNSTSW], (instregex "FNSTSW16r")>; + +// m16. +def WriteFNSTSWm : SchedWriteRes<[HWPort0, HWPort4, HWPort237]> { + let Latency = 6; + let NumMicroOps = 3; +} +def : InstRW<[WriteFNSTSWm], (instregex "FNSTSWm")>; + +// FLDCW. +def WriteFLDCW : SchedWriteRes<[HWPort01, HWPort23, HWPort6]> { + let Latency = 7; + let NumMicroOps = 3; +} +def : InstRW<[WriteFLDCW], (instregex "FLDCW16m")>; + +// FNSTCW. +def WriteFNSTCW : SchedWriteRes<[HWPort237, HWPort4, HWPort6]> { + let NumMicroOps = 3; +} +def : InstRW<[WriteFNSTCW], (instregex "FNSTCW16m")>; + +// FINCSTP FDECSTP. +def : InstRW<[WriteP01], (instregex "FINCSTP", "FDECSTP")>; + +// FFREE. +def : InstRW<[WriteP01], (instregex "FFREE")>; + +// FNSAVE. +def WriteFNSAVE : SchedWriteRes<[]> { + let NumMicroOps = 147; +} +def : InstRW<[WriteFNSAVE], (instregex "FSAVEm")>; + +// FRSTOR. +def WriteFRSTOR : SchedWriteRes<[]> { + let NumMicroOps = 90; +} +def : InstRW<[WriteFRSTOR], (instregex "FRSTORm")>; + +//-- Arithmetic instructions --// + +// FABS. +def : InstRW<[WriteP0], (instregex "ABS_F")>; + +// FCHS. +def : InstRW<[WriteP0], (instregex "CHS_F")>; + +// FCOM(P) FUCOM(P). +// r. +def : InstRW<[WriteP1], (instregex "COM_FST0r", "COMP_FST0r", "UCOM_Fr", + "UCOM_FPr")>; +// m. +def : InstRW<[WriteP1_P23], (instregex "FCOM(32|64)m", "FCOMP(32|64)m")>; + +// FCOMPP FUCOMPP. +// r. +def : InstRW<[Write2P01], (instregex "FCOMPP", "UCOM_FPPr")>; + +// FCOMI(P) FUCOMI(P). +// m. +def : InstRW<[Write3P01], (instregex "COM_FIr", "COM_FIPr", "UCOM_FIr", + "UCOM_FIPr")>; + +// FICOM(P). +def : InstRW<[Write2P1_P23], (instregex "FICOM(16|32)m", "FICOMP(16|32)m")>; + +// FTST. +def : InstRW<[WriteP1], (instregex "TST_F")>; + +// FXAM. +def : InstRW<[Write2P1], (instregex "FXAM")>; + +// FPREM. +def WriteFPREM : SchedWriteRes<[]> { + let Latency = 19; + let NumMicroOps = 28; +} +def : InstRW<[WriteFPREM], (instregex "FPREM")>; + +// FPREM1. +def WriteFPREM1 : SchedWriteRes<[]> { + let Latency = 27; + let NumMicroOps = 41; +} +def : InstRW<[WriteFPREM1], (instregex "FPREM1")>; + +// FRNDINT. +def WriteFRNDINT : SchedWriteRes<[]> { + let Latency = 11; + let NumMicroOps = 17; +} +def : InstRW<[WriteFRNDINT], (instregex "FRNDINT")>; + +//-- Math instructions --// + +// FSCALE. +def WriteFSCALE : SchedWriteRes<[]> { + let Latency = 75; // 49-125 + let NumMicroOps = 50; // 25-75 +} +def : InstRW<[WriteFSCALE], (instregex "FSCALE")>; + +// FXTRACT. +def WriteFXTRACT : SchedWriteRes<[]> { + let Latency = 15; + let NumMicroOps = 17; +} +def : InstRW<[WriteFXTRACT], (instregex "FXTRACT")>; + +//-- Other instructions --// + +// FNOP. +def : InstRW<[WriteP01], (instregex "FNOP")>; + +// WAIT. +def : InstRW<[Write2P01], (instregex "WAIT")>; + +// FNCLEX. +def : InstRW<[Write5P0156], (instregex "FNCLEX")>; + +// FNINIT. +def WriteFNINIT : SchedWriteRes<[]> { + let NumMicroOps = 26; +} +def : InstRW<[WriteFNINIT], (instregex "FNINIT")>; + +//=== Integer MMX and XMM Instructions ===// +//-- Move instructions --// + +// MOVD. +// r32/64 <- (x)mm. +def : InstRW<[WriteP0], (instregex "MMX_MOVD64grr", "MMX_MOVD64from64rr", + "VMOVPDI2DIrr", "MOVPDI2DIrr")>; + +// (x)mm <- r32/64. +def : InstRW<[WriteP5], (instregex "MMX_MOVD64rr", "MMX_MOVD64to64rr", + "VMOVDI2PDIrr", "MOVDI2PDIrr")>; + +// MOVQ. +// r64 <- (x)mm. +def : InstRW<[WriteP0], (instregex "VMOVPQIto64rr")>; + +// (x)mm <- r64. +def : InstRW<[WriteP5], (instregex "VMOV64toPQIrr", "VMOVZQI2PQIrr")>; + +// (x)mm <- (x)mm. +def : InstRW<[WriteP015], (instregex "MMX_MOVQ64rr")>; + +// (V)MOVDQA/U. +// x <- x. +def : InstRW<[WriteP015], (instregex "MOVDQ(A|U)rr", "VMOVDQ(A|U)rr", + "MOVDQ(A|U)rr_REV", "VMOVDQ(A|U)rr_REV", + "VMOVDQ(A|U)Yrr", "VMOVDQ(A|U)Yrr_REV")>; + +// MOVDQ2Q. +def : InstRW<[WriteP01_P5], (instregex "MMX_MOVDQ2Qrr")>; + +// MOVQ2DQ. +def : InstRW<[WriteP015], (instregex "MMX_MOVQ2DQrr")>; + + +// PACKSSWB/DW. +// mm <- mm. +def WriteMMXPACKSSrr : SchedWriteRes<[HWPort5]> { + let Latency = 2; + let NumMicroOps = 3; + let ResourceCycles = [3]; +} +def : InstRW<[WriteMMXPACKSSrr], (instregex "MMX_PACKSSDWirr", + "MMX_PACKSSWBirr", "MMX_PACKUSWBirr")>; + +// mm <- m64. +def WriteMMXPACKSSrm : SchedWriteRes<[HWPort23, HWPort5]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [1, 3]; +} +def : InstRW<[WriteMMXPACKSSrm], (instregex "MMX_PACKSSDWirm", + "MMX_PACKSSWBirm", "MMX_PACKUSWBirm")>; + +// VPMOVSX/ZX BW BD BQ DW DQ. +// y <- x. +def WriteVPMOVSX : SchedWriteRes<[HWPort5]> { + let Latency = 3; let NumMicroOps = 1; - let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup83], (instregex "MULPDrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "MULPSrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "MULSDrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "MULSSrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD132SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD213SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADD231SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB132PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB213PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMADDSUB231PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB132SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB213SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUB231SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD132PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD213PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFMSUBADD231PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD132SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD213SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMADD231SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB132SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB213SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PDYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PSYr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231PSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231SDr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VFNMSUB231SSr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VMULPDYrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VMULPDrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VMULPSYrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VMULPSrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VMULSDrr")>; -def: InstRW<[HWWriteResGroup83], (instregex "VMULSSrr")>; - -def HWWriteResGroup84 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 5; +def : InstRW<[WriteVPMOVSX], (instregex "VPMOV(SX|ZX)(BW|BQ|DW|DQ)Yrr")>; + +// PBLENDW. +// x,x,i / v,v,v,i +def WritePBLENDWr : SchedWriteRes<[HWPort5]>; +def : InstRW<[WritePBLENDWr], (instregex "(V?)PBLENDW(Y?)rri")>; + +// x,m,i / v,v,m,i +def WritePBLENDWm : SchedWriteRes<[HWPort5, HWPort23]> { let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMADDUBSWrm64")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMADDWDirm")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULHRSWrm64")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULHUWirm")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULHWirm")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULLWirm")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PMULUDQirm")>; -def: InstRW<[HWWriteResGroup84], (instregex "MMX_PSADBWirm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PCMPGTQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PHMINPOSUWrm128")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMADDUBSWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMADDWDrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMULDQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMULHRSWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMULHUWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMULHWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMULLWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PMULUDQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "PSADBWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "RCPPSm")>; -def: InstRW<[HWWriteResGroup84], (instregex "RCPSSm")>; -def: InstRW<[HWWriteResGroup84], (instregex "RSQRTPSm")>; -def: InstRW<[HWWriteResGroup84], (instregex "RSQRTSSm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPCMPGTQYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPCMPGTQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPHMINPOSUWrm128")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMADDUBSWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMADDUBSWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMADDWDYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMADDWDrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULDQYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULDQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULHRSWYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULHRSWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULHUWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULHUWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULHWYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULHWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULLWYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULLWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULUDQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPMULUDQrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPSADBWYrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VPSADBWrm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VRCPPSm(_Int)?")>; -def: InstRW<[HWWriteResGroup84], (instregex "VRCPSSm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VRSQRTPSm")>; -def: InstRW<[HWWriteResGroup84], (instregex "VRSQRTSSm")>; - -def HWWriteResGroup85 : SchedWriteRes<[HWPort01,HWPort23]> { - let Latency = 5; + let Latency = 4; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WritePBLENDWm, ReadAfterLd], (instregex "(V?)PBLENDW(Y?)rmi")>; + +// VPBLENDD. +// v,v,v,i. +def WriteVPBLENDDr : SchedWriteRes<[HWPort015]>; +def : InstRW<[WriteVPBLENDDr], (instregex "VPBLENDD(Y?)rri")>; + +// v,v,m,i +def WriteVPBLENDDm : SchedWriteRes<[HWPort015, HWPort23]> { + let NumMicroOps = 2; + let Latency = 4; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteVPBLENDDm, ReadAfterLd], (instregex "VPBLENDD(Y?)rmi")>; + +// MASKMOVQ. +def WriteMASKMOVQ : SchedWriteRes<[HWPort0, HWPort4, HWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 2]; +} +def : InstRW<[WriteMASKMOVQ], (instregex "MMX_MASKMOVQ(64)?")>; + +// MASKMOVDQU. +def WriteMASKMOVDQU : SchedWriteRes<[HWPort04, HWPort56, HWPort23]> { + let Latency = 14; + let NumMicroOps = 10; + let ResourceCycles = [4, 2, 4]; +} +def : InstRW<[WriteMASKMOVDQU], (instregex "(V?)MASKMOVDQU(64)?")>; + +// VPMASKMOV D/Q. +// v,v,m. +def WriteVPMASKMOVr : SchedWriteRes<[HWPort5, HWPort23]> { + let Latency = 4; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; +} +def : InstRW<[WriteVPMASKMOVr, ReadAfterLd], + (instregex "VPMASKMOV(D|Q)(Y?)rm")>; + +// m, v,v. +def WriteVPMASKMOVm : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; +} +def : InstRW<[WriteVPMASKMOVm], (instregex "VPMASKMOV(D|Q)(Y?)mr")>; + +// PMOVMSKB. +def WritePMOVMSKB : SchedWriteRes<[HWPort0]> { + let Latency = 3; +} +def : InstRW<[WritePMOVMSKB], (instregex "(V|MMX_)?PMOVMSKB(Y?)rr")>; + +// PEXTR B/W/D/Q. +// r32,x,i. +def WritePEXTRr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup85], (instregex "MULPDrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "MULPSrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "MULSDrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "MULSSrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD132SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD213SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADD231SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB132PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB213PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMADDSUB231PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB132SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB213SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUB231SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD132PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD213PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFMSUBADD231PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD132SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD213SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMADD231SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB132SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB213SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PDYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PSYm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231PSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231SDm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VFNMSUB231SSm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VMULPDYrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VMULPDrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VMULPSYrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VMULPSrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VMULSDrm")>; -def: InstRW<[HWWriteResGroup85], (instregex "VMULSSrm")>; - -def HWWriteResGroup86 : SchedWriteRes<[HWPort1,HWPort5]> { + let ResourceCycles = [1, 1]; +} +def : InstRW<[WritePEXTRr], (instregex "PEXTR(B|W|D|Q)rr", "MMX_PEXTRWirri")>; + +// m8,x,i. +def WritePEXTRm : SchedWriteRes<[HWPort23, HWPort4, HWPort5]> { + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WritePEXTRm], (instregex "PEXTR(B|W|D|Q)mr")>; + +// VPBROADCAST B/W. +// x, m8/16. +def WriteVPBROADCAST128Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { let Latency = 5; let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[HWWriteResGroup86], (instregex "CVTSI2SS64rr")>; -def: InstRW<[HWWriteResGroup86], (instregex "HADDPDrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "HADDPSrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "HSUBPDrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "HSUBPSrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VCVTSI2SS64rr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHADDPDrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHADDPSYrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHADDPSrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPDYrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPDrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPSYrr")>; -def: InstRW<[HWWriteResGroup86], (instregex "VHSUBPSrr")>; - -def HWWriteResGroup87 : SchedWriteRes<[HWPort1,HWPort6,HWPort0]> { + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WriteVPBROADCAST128Ld, ReadAfterLd], + (instregex "VPBROADCAST(B|W)rm")>; + +// y, m8/16 +def WriteVPBROADCAST256Ld : SchedWriteRes<[HWPort01, HWPort23, HWPort5]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; +} +def : InstRW<[WriteVPBROADCAST256Ld, ReadAfterLd], + (instregex "VPBROADCAST(B|W)Yrm")>; + +// VPGATHERDD. +// x. +def WriteVPGATHERDD128 : SchedWriteRes<[]> { + let NumMicroOps = 20; +} +def : InstRW<[WriteVPGATHERDD128, ReadAfterLd], (instregex "VPGATHERDDrm")>; + +// y. +def WriteVPGATHERDD256 : SchedWriteRes<[]> { + let NumMicroOps = 34; +} +def : InstRW<[WriteVPGATHERDD256, ReadAfterLd], (instregex "VPGATHERDDYrm")>; + +// VPGATHERQD. +// x. +def WriteVPGATHERQD128 : SchedWriteRes<[]> { + let NumMicroOps = 15; +} +def : InstRW<[WriteVPGATHERQD128, ReadAfterLd], (instregex "VPGATHERQDrm")>; + +// y. +def WriteVPGATHERQD256 : SchedWriteRes<[]> { + let NumMicroOps = 22; +} +def : InstRW<[WriteVPGATHERQD256, ReadAfterLd], (instregex "VPGATHERQDYrm")>; + +// VPGATHERDQ. +// x. +def WriteVPGATHERDQ128 : SchedWriteRes<[]> { + let NumMicroOps = 12; +} +def : InstRW<[WriteVPGATHERDQ128, ReadAfterLd], (instregex "VPGATHERDQrm")>; + +// y. +def WriteVPGATHERDQ256 : SchedWriteRes<[]> { + let NumMicroOps = 20; +} +def : InstRW<[WriteVPGATHERDQ256, ReadAfterLd], (instregex "VPGATHERDQYrm")>; + +// VPGATHERQQ. +// x. +def WriteVPGATHERQQ128 : SchedWriteRes<[]> { + let NumMicroOps = 14; +} +def : InstRW<[WriteVPGATHERQQ128, ReadAfterLd], (instregex "VPGATHERQQrm")>; + +// y. +def WriteVPGATHERQQ256 : SchedWriteRes<[]> { + let NumMicroOps = 22; +} +def : InstRW<[WriteVPGATHERQQ256, ReadAfterLd], (instregex "VPGATHERQQYrm")>; + +//-- Arithmetic instructions --// + +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// + +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { let Latency = 5; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1, 2]; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; +} +// v <- v,m. +def : WriteRes { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 2, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def WritePHADDSUBr : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 3; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; } -def: InstRW<[HWWriteResGroup87], (instregex "STR32r")>; +def : InstRW<[WritePHADDSUBr], (instregex "MMX_PHADD(W?)rr64", + "MMX_PHADDSWrr64", + "MMX_PHSUB(W|D)rr64", + "MMX_PHSUBSWrr64", + "(V?)PH(ADD|SUB)(W|D)(Y?)rr", + "(V?)PH(ADD|SUB)SWrr(256)?")>; -def HWWriteResGroup88 : SchedWriteRes<[HWPort1,HWPort0,HWPort0156]> { - let Latency = 5; +// v <- v,m. +def WritePHADDSUBm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 6; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1, 2, 1]; } -def: InstRW<[HWWriteResGroup88], (instregex "MULX32rr")>; +def : InstRW<[WritePHADDSUBm, ReadAfterLd], + (instregex "MMX_PHADD(W?)rm64", + "MMX_PHADDSWrm64", + "MMX_PHSUB(W|D)rm64", + "MMX_PHSUBSWrm64", + "(V?)PH(ADD|SUB)(W|D)(Y?)rm", + "(V?)PH(ADD|SUB)SWrm(128|256)?")>; -def HWWriteResGroup89 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[HWWriteResGroup89], (instregex "HADDPDrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "HADDPSrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "HSUBPDrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "HSUBPSrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHADDPDrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHADDPDrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHADDPSYrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHADDPSrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPDYrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPDrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPSYrm")>; -def: InstRW<[HWWriteResGroup89], (instregex "VHSUBPSrm")>; - -def HWWriteResGroup90 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { +// PCMPGTQ. +// v <- v,v. +def WritePCMPGTQr : SchedWriteRes<[HWPort0]> { let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 1; } -def: InstRW<[HWWriteResGroup90], (instregex "CVTTSS2SI64rm")>; +def : InstRW<[WritePCMPGTQr], (instregex "(V?)PCMPGTQ(Y?)rr")>; -def HWWriteResGroup91 : SchedWriteRes<[HWPort1,HWPort23,HWPort0,HWPort0156]> { +// v <- v,m. +def WritePCMPGTQm : SchedWriteRes<[HWPort0, HWPort23]> { let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup91], (instregex "MULX32rm")>; +def : InstRW<[WritePCMPGTQm, ReadAfterLd], (instregex "(V?)PCMPGTQ(Y?)rm")>; -def HWWriteResGroup92 : SchedWriteRes<[HWPort6,HWPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [1,4]; +// PMULLD. +// x,x / y,y,y. +def WritePMULLDr : SchedWriteRes<[HWPort0]> { + let Latency = 10; + let NumMicroOps = 2; + let ResourceCycles = [2]; } -def: InstRW<[HWWriteResGroup92], (instregex "PAUSE")>; +def : InstRW<[WritePMULLDr], (instregex "(V?)PMULLD(Y?)rr")>; -def HWWriteResGroup93 : SchedWriteRes<[HWPort0,HWPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [1,4]; +// x,m / y,y,m. +def WritePMULLDm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 10; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup93], (instregex "XSETBV")>; +def : InstRW<[WritePMULLDm, ReadAfterLd], (instregex "(V?)PMULLD(Y?)rm")>; -def HWWriteResGroup94 : SchedWriteRes<[HWPort0,HWPort0156]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[HWWriteResGroup94], (instregex "CMPXCHG32rr")>; -def: InstRW<[HWWriteResGroup94], (instregex "CMPXCHG8rr")>; -def: InstRW<[HWWriteResGroup94], (instregex "ROUNDPDr")>; -def: InstRW<[HWWriteResGroup94], (instregex "ROUNDPSr")>; -def: InstRW<[HWWriteResGroup94], (instregex "ROUNDSDr")>; -def: InstRW<[HWWriteResGroup94], (instregex "ROUNDSSr")>; -def: InstRW<[HWWriteResGroup94], (instregex "VBROADCASTF128")>; -def: InstRW<[HWWriteResGroup94], (instregex "VPBROADCASTMB2QZrr")>; -def: InstRW<[HWWriteResGroup94], (instregex "VROUNDPDr")>; -def: InstRW<[HWWriteResGroup94], (instregex "VROUNDPSr")>; -def: InstRW<[HWWriteResGroup94], (instregex "VROUNDSDr")>; - -def HWWriteResGroup95 : SchedWriteRes<[HWPort1,HWPort5]> { - let Latency = 6; +//-- Logic instructions --// + +// PTEST. +// v,v. +def WritePTESTr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup95], (instregex "VCVTDQ2PDYrr")>; -def: InstRW<[HWWriteResGroup95], (instregex "VCVTPD2DQYrr")>; -def: InstRW<[HWWriteResGroup95], (instregex "VCVTPD2PSYrr")>; -def: InstRW<[HWWriteResGroup95], (instregex "VCVTPS2PHYrr")>; -def: InstRW<[HWWriteResGroup95], (instregex "VCVTTPD2DQYrr")>; -def: InstRW<[HWWriteResGroup95], (instregex "ROUNDPDm")>; -def: InstRW<[HWWriteResGroup95], (instregex "ROUNDPSm")>; -def: InstRW<[HWWriteResGroup95], (instregex "ROUNDSDm")>; -def: InstRW<[HWWriteResGroup95], (instregex "ROUNDSSm")>; -def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPDm")>; -def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPDm")>; -def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPSm")>; -def: InstRW<[HWWriteResGroup95], (instregex "VROUNDPSm")>; -def: InstRW<[HWWriteResGroup95], (instregex "VROUNDSDm")>; -def: InstRW<[HWWriteResGroup95], (instregex "VROUNDSSm")>; - -def HWWriteResGroup96 : SchedWriteRes<[HWPort1,HWPort5,HWPort23]> { - let Latency = 6; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup96], (instregex "VCVTDQ2PDYrm")>; +def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rr")>; -def HWWriteResGroup97 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort237]> { +// v,m. +def WritePTESTm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup97], (instregex "VCVTPS2PHYmr")>; +def : InstRW<[WritePTESTr], (instregex "(V?)PTEST(Y?)rm")>; -def HWWriteResGroup98 : SchedWriteRes<[HWPort1,HWPort6,HWPort0,HWPort0156]> { - let Latency = 6; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; +// PSLL,PSRL,PSRA W/D/Q. +// x,x / v,v,x. +def WritePShift : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup98], (instregex "SLDT32r")>; +def : InstRW<[WritePShift], (instregex "(V?)PS(LL|RL|RA)(W|D|Q)(Y?)rr")>; -def HWWriteResGroup99 : SchedWriteRes<[HWPort6,HWPort0156]> { - let Latency = 6; - let NumMicroOps = 6; - let ResourceCycles = [1,5]; +// PSLL,PSRL DQ. +def : InstRW<[WriteP5], (instregex "(V?)PS(R|L)LDQ(Y?)ri")>; + +//-- Other --// + +// EMMS. +def WriteEMMS : SchedWriteRes<[]> { + let Latency = 13; + let NumMicroOps = 31; } -def: InstRW<[HWWriteResGroup99], (instregex "STD")>; +def : InstRW<[WriteEMMS], (instregex "MMX_EMMS")>; -def HWWriteResGroup100 : SchedWriteRes<[HWPort5]> { - let Latency = 7; - let NumMicroOps = 1; - let ResourceCycles = [1]; +//=== Floating Point XMM and YMM Instructions ===// +//-- Move instructions --// + +// MOVMSKP S/D. +// r32 <- x. +def WriteMOVMSKPr : SchedWriteRes<[HWPort0]> { + let Latency = 3; } -def: InstRW<[HWWriteResGroup100], (instregex "AESDECLASTrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "AESDECrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "AESENCLASTrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "AESENCrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "KANDQrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "VAESDECLASTrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "VAESDECrr")>; -def: InstRW<[HWWriteResGroup100], (instregex "VAESENCrr")>; +def : InstRW<[WriteMOVMSKPr], (instregex "(V?)MOVMSKP(S|D)rr")>; -def HWWriteResGroup101 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[HWWriteResGroup101], (instregex "AESDECLASTrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "AESDECrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "AESENCLASTrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "AESENCrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "VAESDECLASTrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "VAESDECrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "VAESENCLASTrm")>; -def: InstRW<[HWWriteResGroup101], (instregex "VAESENCrm")>; - -def HWWriteResGroup102 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; +// r32 <- y. +def WriteVMOVMSKPYr : SchedWriteRes<[HWPort0]> { + let Latency = 2; } -def: InstRW<[HWWriteResGroup102], (instregex "MPSADBWrri")>; -def: InstRW<[HWWriteResGroup102], (instregex "VMPSADBWYrri")>; -def: InstRW<[HWWriteResGroup102], (instregex "VMPSADBWrri")>; +def : InstRW<[WriteVMOVMSKPYr], (instregex "VMOVMSKP(S|D)Yrr")>; -def HWWriteResGroup103 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; +// VPERM2F128. +def : InstRW<[WriteFShuffle256], (instregex "VPERM2F128rr")>; +def : InstRW<[WriteFShuffle256Ld, ReadAfterLd], (instregex "VPERM2F128rm")>; + +// BLENDVP S/D. +def : InstRW<[WriteFVarBlend], (instregex "BLENDVP(S|D)rr0")>; +def : InstRW<[WriteFVarBlendLd, ReadAfterLd], (instregex "BLENDVP(S|D)rm0")>; + +// VBROADCASTF128. +def : InstRW<[WriteLoad], (instregex "VBROADCASTF128")>; + +// EXTRACTPS. +// r32,x,i. +def WriteEXTRACTPSr : SchedWriteRes<[HWPort0, HWPort5]> { + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup103], (instregex "MPSADBWrmi")>; -def: InstRW<[HWWriteResGroup103], (instregex "VMPSADBWYrmi")>; -def: InstRW<[HWWriteResGroup103], (instregex "VMPSADBWrmi")>; +def : InstRW<[WriteEXTRACTPSr], (instregex "(V?)EXTRACTPSrr")>; -def HWWriteResGroup104 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { - let Latency = 9; +// m32,x,i. +def WriteEXTRACTPSm : SchedWriteRes<[HWPort0, HWPort5, HWPort23]> { + let Latency = 4; let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup104], (instregex "DPPDrri")>; -def: InstRW<[HWWriteResGroup104], (instregex "VDPPDrri")>; +def : InstRW<[WriteEXTRACTPSm], (instregex "(V?)EXTRACTPSmr")>; -def HWWriteResGroup105 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; +// VEXTRACTF128. +// x,y,i. +def : InstRW<[WriteFShuffle256], (instregex "VEXTRACTF128rr")>; + +// m128,y,i. +def WriteVEXTRACTF128m : SchedWriteRes<[HWPort23, HWPort4]> { + let Latency = 4; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup105], (instregex "DPPDrmi")>; -def: InstRW<[HWWriteResGroup105], (instregex "VDPPDrmi")>; +def : InstRW<[WriteVEXTRACTF128m], (instregex "VEXTRACTF128mr")>; -def HWWriteResGroup106 : SchedWriteRes<[HWPort0]> { - let Latency = 10; +// VINSERTF128. +// y,y,x,i. +def : InstRW<[WriteFShuffle256], (instregex "VINSERTF128rr")>; + +// y,y,m128,i. +def WriteVINSERTF128m : SchedWriteRes<[HWPort015, HWPort23]> { + let Latency = 4; let NumMicroOps = 2; - let ResourceCycles = [2]; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup106], (instregex "PMULLDrr")>; -def: InstRW<[HWWriteResGroup106], (instregex "VPMULLDYrr")>; -def: InstRW<[HWWriteResGroup106], (instregex "VPMULLDrr")>; +def : InstRW<[WriteFShuffle256, ReadAfterLd], (instregex "VINSERTF128rm")>; -def HWWriteResGroup107 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 10; +// VMASKMOVP S/D. +// v,v,m. +def WriteVMASKMOVPrm : SchedWriteRes<[HWPort5, HWPort23]> { + let Latency = 4; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup107], (instregex "PMULLDrm")>; -def: InstRW<[HWWriteResGroup107], (instregex "VPMULLDYrm")>; -def: InstRW<[HWWriteResGroup107], (instregex "VPMULLDrm")>; +def : InstRW<[WriteVMASKMOVPrm], (instregex "VMASKMOVP(S|D)(Y?)rm")>; -def HWWriteResGroup108 : SchedWriteRes<[HWPort0]> { - let Latency = 11; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// m128,x,x. +def WriteVMASKMOVPmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; } -def: InstRW<[HWWriteResGroup108], (instregex "DIVPSrr")>; -def: InstRW<[HWWriteResGroup108], (instregex "DIVSSrr")>; +def : InstRW<[WriteVMASKMOVPmr], (instregex "VMASKMOVP(S|D)mr")>; -def HWWriteResGroup109 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; +// m256,y,y. +def WriteVMASKMOVPYmr : SchedWriteRes<[HWPort0, HWPort1, HWPort4, HWPort23]> { + let Latency = 14; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; } -def: InstRW<[HWWriteResGroup109], (instregex "DIVPSrm")>; -def: InstRW<[HWWriteResGroup109], (instregex "DIVSSrm")>; +def : InstRW<[WriteVMASKMOVPYmr], (instregex "VMASKMOVP(S|D)Ymr")>; -def HWWriteResGroup110 : SchedWriteRes<[HWPort0]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [3]; +// VGATHERDPS. +// x. +def WriteVGATHERDPS128 : SchedWriteRes<[]> { + let NumMicroOps = 20; } -def: InstRW<[HWWriteResGroup110], (instregex "PCMPISTRIrr")>; -def: InstRW<[HWWriteResGroup110], (instregex "PCMPISTRM128rr")>; -def: InstRW<[HWWriteResGroup110], (instregex "VPCMPISTRIrr")>; -def: InstRW<[HWWriteResGroup110], (instregex "VPCMPISTRM128rr")>; +def : InstRW<[WriteVGATHERDPS128, ReadAfterLd], (instregex "VGATHERDPSrm")>; -def HWWriteResGroup111 : SchedWriteRes<[HWPort0,HWPort5]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; +// y. +def WriteVGATHERDPS256 : SchedWriteRes<[]> { + let NumMicroOps = 34; } -def: InstRW<[HWWriteResGroup111], (instregex "PCLMULQDQrr")>; -def: InstRW<[HWWriteResGroup111], (instregex "VPCLMULQDQrr")>; +def : InstRW<[WriteVGATHERDPS256, ReadAfterLd], (instregex "VGATHERDPSYrm")>; -def HWWriteResGroup112 : SchedWriteRes<[HWPort0,HWPort015]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; +// VGATHERQPS. +// x. +def WriteVGATHERQPS128 : SchedWriteRes<[]> { + let NumMicroOps = 15; } -def: InstRW<[HWWriteResGroup112], (instregex "VRCPPSYr(_Int)?")>; -def: InstRW<[HWWriteResGroup112], (instregex "VRSQRTPSYr")>; +def : InstRW<[WriteVGATHERQPS128, ReadAfterLd], (instregex "VGATHERQPSrm")>; -def HWWriteResGroup113 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [3,1]; +// y. +def WriteVGATHERQPS256 : SchedWriteRes<[]> { + let NumMicroOps = 22; } -def: InstRW<[HWWriteResGroup113], (instregex "PCMPISTRIrm")>; -def: InstRW<[HWWriteResGroup113], (instregex "PCMPISTRM128rm")>; -def: InstRW<[HWWriteResGroup113], (instregex "VPCMPISTRIrm")>; -def: InstRW<[HWWriteResGroup113], (instregex "VPCMPISTRM128rm")>; +def : InstRW<[WriteVGATHERQPS256, ReadAfterLd], (instregex "VGATHERQPSYrm")>; -def HWWriteResGroup114 : SchedWriteRes<[HWPort0,HWPort5,HWPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; +// VGATHERDPD. +// x. +def WriteVGATHERDPD128 : SchedWriteRes<[]> { + let NumMicroOps = 12; } -def: InstRW<[HWWriteResGroup114], (instregex "PCLMULQDQrm")>; -def: InstRW<[HWWriteResGroup114], (instregex "VPCLMULQDQrm")>; -def: InstRW<[HWWriteResGroup114], (instregex "VRCPPSYm(_Int)?")>; +def : InstRW<[WriteVGATHERDPD128, ReadAfterLd], (instregex "VGATHERDPDrm")>; -def HWWriteResGroup115 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; +// y. +def WriteVGATHERDPD256 : SchedWriteRes<[]> { + let NumMicroOps = 20; } -def: InstRW<[HWWriteResGroup115], (instregex "VRCPPSm")>; -def: InstRW<[HWWriteResGroup115], (instregex "VRSQRTPSYm")>; +def : InstRW<[WriteVGATHERDPD256, ReadAfterLd], (instregex "VGATHERDPDYrm")>; -def HWWriteResGroup116 : SchedWriteRes<[HWPort4,HWPort23,HWPort237,HWPort0,HWPort15,HWPort0156]> { - let Latency = 11; +// VGATHERQPD. +// x. +def WriteVGATHERQPD128 : SchedWriteRes<[]> { let NumMicroOps = 14; - let ResourceCycles = [1,1,1,4,2,5]; } -def: InstRW<[HWWriteResGroup116], (instregex "CMPXCHG8B")>; +def : InstRW<[WriteVGATHERQPD128, ReadAfterLd], (instregex "VGATHERQPDrm")>; -def HWWriteResGroup117 : SchedWriteRes<[HWPort0]> { - let Latency = 13; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// y. +def WriteVGATHERQPD256 : SchedWriteRes<[]> { + let NumMicroOps = 22; } -def: InstRW<[HWWriteResGroup117], (instregex "SQRTPSr")>; -def: InstRW<[HWWriteResGroup117], (instregex "SQRTSSr")>; -def: InstRW<[HWWriteResGroup117], (instregex "VDIVPSrr")>; -def: InstRW<[HWWriteResGroup117], (instregex "VDIVSSrr")>; +def : InstRW<[WriteVGATHERQPD256, ReadAfterLd], (instregex "VGATHERQPDYrm")>; -def HWWriteResGroup118 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 13; +//-- Conversion instructions --// + +// CVTPD2PS. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVTPD2PSrr")>; + +// x,m128. +def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVTPD2PS(X?)rm")>; + +// x,y. +def WriteCVTPD2PSYrr : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 5; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup118], (instregex "SQRTPSm")>; -def: InstRW<[HWWriteResGroup118], (instregex "SQRTSSm")>; -def: InstRW<[HWWriteResGroup118], (instregex "VDIVPSrm")>; -def: InstRW<[HWWriteResGroup118], (instregex "VDIVSSrm")>; +def : InstRW<[WriteCVTPD2PSYrr], (instregex "(V?)CVTPD2PSYrr")>; -def HWWriteResGroup119 : SchedWriteRes<[HWPort0]> { - let Latency = 14; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// x,m256. +def WriteCVTPD2PSYrm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup119], (instregex "DIVPDrr")>; -def: InstRW<[HWWriteResGroup119], (instregex "DIVSDrr")>; -def: InstRW<[HWWriteResGroup119], (instregex "VSQRTPSr")>; -def: InstRW<[HWWriteResGroup119], (instregex "VSQRTSSr")>; +def : InstRW<[WriteCVTPD2PSYrm], (instregex "(V?)CVTPD2PSYrm")>; -def HWWriteResGroup120 : SchedWriteRes<[HWPort5]> { - let Latency = 14; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[HWWriteResGroup120], (instregex "AESIMCrr")>; -def: InstRW<[HWWriteResGroup120], (instregex "VAESIMCrr")>; +// CVTSD2SS. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V)?CVTSD2SSrr")>; -def HWWriteResGroup121 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 14; +// x,m64. +def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(Int_)?(V)?CVTSD2SSrm")>; + +// CVTPS2PD. +// x,x. +def WriteCVTPS2PDrr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup121], (instregex "DIVPDrm")>; -def: InstRW<[HWWriteResGroup121], (instregex "DIVSDrm")>; -def: InstRW<[HWWriteResGroup121], (instregex "VSQRTPSm")>; -def: InstRW<[HWWriteResGroup121], (instregex "VSQRTSSm")>; +def : InstRW<[WriteCVTPS2PDrr], (instregex "(V?)CVTPS2PDrr")>; -def HWWriteResGroup122 : SchedWriteRes<[HWPort5,HWPort23]> { - let Latency = 14; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; +// x,m64. +// y,m128. +def WriteCVTPS2PDrm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup122], (instregex "AESIMCrm")>; -def: InstRW<[HWWriteResGroup122], (instregex "VAESIMCrm")>; +def : InstRW<[WriteCVTPS2PDrm], (instregex "(V?)CVTPS2PD(Y?)rm")>; -def HWWriteResGroup123 : SchedWriteRes<[HWPort0,HWPort1,HWPort5]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; +// y,x. +def WriteVCVTPS2PDYrr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup123], (instregex "DPPSrri")>; -def: InstRW<[HWWriteResGroup123], (instregex "VDPPSYrri")>; -def: InstRW<[HWWriteResGroup123], (instregex "VDPPSrri")>; +def : InstRW<[WriteVCVTPS2PDYrr], (instregex "VCVTPS2PDYrr")>; -def HWWriteResGroup124 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort23]> { - let Latency = 14; - let NumMicroOps = 5; - let ResourceCycles = [2,1,1,1]; +// CVTSS2SD. +// x,x. +def WriteCVTSS2SDrr : SchedWriteRes<[HWPort0, HWPort5]> { + let Latency = 2; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup124], (instregex "DPPSrmi")>; -def: InstRW<[HWWriteResGroup124], (instregex "VDPPSYrmi")>; -def: InstRW<[HWWriteResGroup124], (instregex "VDPPSrmi")>; +def : InstRW<[WriteCVTSS2SDrr], (instregex "(Int_)?(V?)CVTSS2SDrr")>; -def HWWriteResGroup125 : SchedWriteRes<[HWPort23,HWPort0156]> { - let Latency = 14; - let NumMicroOps = 15; - let ResourceCycles = [1,14]; +// x,m32. +def WriteCVTSS2SDrm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 5; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup125], (instregex "POPF16")>; +def : InstRW<[WriteCVTSS2SDrm], (instregex "(Int_)?(V?)CVTSS2SDrm")>; + +// CVTDQ2PD. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(V)?CVTDQ2PDrr")>; + +// y,x. +def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVTDQ2PDYrr")>; + +// CVT(T)PD2DQ. +// x,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(V?)CVT(T?)PD2DQrr")>; +// x,m128. +def : InstRW<[WriteP1_P5_Lat4Ld], (instregex "(V?)CVT(T?)PD2DQrm")>; +// x,y. +def : InstRW<[WriteP1_P5_Lat6], (instregex "VCVT(T?)PD2DQYrr")>; +// x,m256. +def : InstRW<[WriteP1_P5_Lat6Ld], (instregex "VCVT(T?)PD2DQYrm")>; + +// CVT(T)PS2PI. +// mm,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PS2PIirr")>; + +// CVTPI2PD. +// x,mm. +def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PI2PDirr")>; + +// CVT(T)PD2PI. +// mm,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "MMX_CVT(T?)PD2PIirr")>; + +// CVSTSI2SS. +// x,r32. +def : InstRW<[WriteP1_P5_Lat4], (instregex "(Int_)?(V?)CVT(T?)SI2SS(64)?rr")>; + +// CVT(T)SS2SI. +// r32,x. +def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rr")>; +// r32,m32. +def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SS2SI(64)?rm")>; + +// CVTSI2SD. +// x,r32/64. +def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVTSI2SS(64)?rr")>; + +// CVTSD2SI. +// r32/64 +def : InstRW<[WriteP0_P1_Lat4], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rr")>; +// r32,m32. +def : InstRW<[WriteP0_P1_Lat4Ld], (instregex "(Int_)?(V?)CVT(T?)SD2SI(64)?rm")>; + +// VCVTPS2PH. +// x,v,i. +def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPS2PH(Y?)rr")>; +// m,v,i. +def : InstRW<[WriteP1_P5_Lat4Ld, WriteRMW], (instregex "VCVTPS2PH(Y?)mr")>; + +// VCVTPH2PS. +// v,x. +def : InstRW<[WriteP1_P5_Lat4], (instregex "VCVTPH2PS(Y?)rr")>; + +//-- Arithmetic instructions --// -def HWWriteResGroup126 : SchedWriteRes<[HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort0,HWPort0156]> { - let Latency = 15; - let NumMicroOps = 8; - let ResourceCycles = [1,1,1,1,1,1,2]; +// HADD, HSUB PS/PD +// x,x / v,v,v. +def WriteHADDSUBPr : SchedWriteRes<[HWPort1, HWPort5]> { + let Latency = 5; + let NumMicroOps = 3; + let ResourceCycles = [1, 2]; } -def: InstRW<[HWWriteResGroup126], (instregex "INSB")>; -def: InstRW<[HWWriteResGroup126], (instregex "INSL")>; -def: InstRW<[HWWriteResGroup126], (instregex "INSW")>; +def : InstRW<[WriteHADDSUBPr], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rr")>; -def HWWriteResGroup127 : SchedWriteRes<[HWPort5]> { - let Latency = 16; - let NumMicroOps = 16; - let ResourceCycles = [16]; +// x,m / v,v,m. +def WriteHADDSUBPm : SchedWriteRes<[HWPort1, HWPort5, HWPort23]> { + let Latency = 9; + let NumMicroOps = 4; + let ResourceCycles = [1, 2, 1]; } -def: InstRW<[HWWriteResGroup127], (instregex "VZEROALL")>; +def : InstRW<[WriteHADDSUBPm], (instregex "(V?)H(ADD|SUB)P(S|D)(Y?)rm")>; -def HWWriteResGroup128 : SchedWriteRes<[HWPort0,HWPort4,HWPort5,HWPort23,HWPort237,HWPort0,HWPort0156]> { - let Latency = 16; - let NumMicroOps = 19; - let ResourceCycles = [2,1,4,1,1,4,6]; +// MULL SS/SD PS/PD. +// x,x / v,v,v. +def WriteMULr : SchedWriteRes<[HWPort01]> { + let Latency = 5; } -def: InstRW<[HWWriteResGroup128], (instregex "CMPXCHG16B")>; +def : InstRW<[WriteMULr], (instregex "(V?)MUL(P|S)(S|D)rr")>; -def HWWriteResGroup129 : SchedWriteRes<[HWPort0,HWPort5,HWPort0156]> { - let Latency = 18; - let NumMicroOps = 8; - let ResourceCycles = [4,3,1]; +// x,m / v,v,m. +def WriteMULm : SchedWriteRes<[HWPort01, HWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup129], (instregex "PCMPESTRIrr")>; -def: InstRW<[HWWriteResGroup129], (instregex "VPCMPESTRIrr")>; +def : InstRW<[WriteMULm], (instregex "(V?)MUL(P|S)(S|D)rm")>; -def HWWriteResGroup130 : SchedWriteRes<[HWPort5,HWPort6,HWPort0,HWPort0156]> { - let Latency = 18; - let NumMicroOps = 8; - let ResourceCycles = [1,1,1,5]; +// VDIVPS. +// y,y,y. +def WriteVDIVPSYrr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 19; // 18-21 cycles. + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup130], (instregex "CPUID")>; -def: InstRW<[HWWriteResGroup130], (instregex "RDTSC")>; +def : InstRW<[WriteVDIVPSYrr], (instregex "VDIVPSYrr")>; -def HWWriteResGroup131 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort0156]> { - let Latency = 18; - let NumMicroOps = 9; - let ResourceCycles = [4,3,1,1]; +// y,y,m256. +def WriteVDIVPSYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 23; // 18-21 + 4 cycles. + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup131], (instregex "PCMPESTRIrm")>; -def: InstRW<[HWWriteResGroup131], (instregex "VPCMPESTRIrm")>; +def : InstRW<[WriteVDIVPSYrm, ReadAfterLd], (instregex "VDIVPSYrm")>; -def HWWriteResGroup132 : SchedWriteRes<[HWPort6,HWPort23,HWPort0156]> { - let Latency = 18; - let NumMicroOps = 19; - let ResourceCycles = [3,1,15]; +// VDIVPD. +// y,y,y. +def WriteVDIVPDYrr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 27; // 19-35 cycles. + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup132], (instregex "XRSTOR")>; +def : InstRW<[WriteVDIVPDYrr], (instregex "VDIVPDYrr")>; -def HWWriteResGroup133 : SchedWriteRes<[HWPort0,HWPort5,HWPort015,HWPort0156]> { - let Latency = 19; - let NumMicroOps = 9; - let ResourceCycles = [4,3,1,1]; +// y,y,m256. +def WriteVDIVPDYrm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 31; // 19-35 + 4 cycles. + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup133], (instregex "PCMPESTRM128rr")>; -def: InstRW<[HWWriteResGroup133], (instregex "VPCMPESTRM128rr")>; +def : InstRW<[WriteVDIVPDYrm, ReadAfterLd], (instregex "VDIVPDYrm")>; -def HWWriteResGroup134 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort015,HWPort0156]> { - let Latency = 19; - let NumMicroOps = 10; - let ResourceCycles = [4,3,1,1,1]; -} -def: InstRW<[HWWriteResGroup134], (instregex "PCMPESTRM128rm")>; -def: InstRW<[HWWriteResGroup134], (instregex "VPCMPESTRM128rm")>; -def: InstRW<[HWWriteResGroup134], (instregex "SQRTPDr")>; -def: InstRW<[HWWriteResGroup134], (instregex "SQRTSDr")>; -def: InstRW<[HWWriteResGroup134], (instregex "VDIVPDrr")>; -def: InstRW<[HWWriteResGroup134], (instregex "VDIVSDrr")>; -def: InstRW<[HWWriteResGroup134], (instregex "SQRTPDm")>; -def: InstRW<[HWWriteResGroup134], (instregex "SQRTSDm")>; -def: InstRW<[HWWriteResGroup134], (instregex "VDIVPDrm")>; -def: InstRW<[HWWriteResGroup134], (instregex "VDIVSDrm")>; - -def HWWriteResGroup135 : SchedWriteRes<[HWPort5,HWPort6,HWPort0156]> { - let Latency = 20; - let NumMicroOps = 10; - let ResourceCycles = [1,2,7]; +// VRCPPS. +// y,y. +def WriteVRCPPSr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup135], (instregex "MWAITrr")>; +def : InstRW<[WriteVRCPPSr], (instregex "VRCPPSYr(_Int)?")>; -def HWWriteResGroup136 : SchedWriteRes<[HWPort0]> { - let Latency = 21; - let NumMicroOps = 1; - let ResourceCycles = [1]; +// y,m256. +def WriteVRCPPSm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup136], (instregex "VSQRTPDr")>; -def: InstRW<[HWWriteResGroup136], (instregex "VSQRTSDr")>; +def : InstRW<[WriteVRCPPSm], (instregex "VRCPPSYm(_Int)?")>; -def HWWriteResGroup137 : SchedWriteRes<[HWPort0,HWPort23]> { - let Latency = 21; +// ROUND SS/SD PS/PD. +// v,v,i. +def WriteROUNDr : SchedWriteRes<[HWPort1]> { + let Latency = 6; let NumMicroOps = 2; - let ResourceCycles = [1,1]; + let ResourceCycles = [2]; } -def: InstRW<[HWWriteResGroup137], (instregex "VSQRTPDm")>; -def: InstRW<[HWWriteResGroup137], (instregex "VSQRTSDm")>; +def : InstRW<[WriteROUNDr], (instregex "(V?)ROUND(Y?)(S|P)(S|D)r(_Int)?")>; -def HWWriteResGroup138 : SchedWriteRes<[HWPort0,HWPort015]> { - let Latency = 21; +// v,m,i. +def WriteROUNDm : SchedWriteRes<[HWPort1, HWPort23]> { + let Latency = 10; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup138], (instregex "VDIVPSYrr")>; -def: InstRW<[HWWriteResGroup138], (instregex "VSQRTPSYr")>; +def : InstRW<[WriteROUNDm], (instregex "(V?)ROUND(Y?)(S|P)(S|D)m(_Int)?")>; -def HWWriteResGroup139 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 21; +// DPPS. +// x,x,i / v,v,v,i. +def WriteDPPSr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { + let Latency = 14; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup139], (instregex "VDIVPSYrm")>; -def: InstRW<[HWWriteResGroup139], (instregex "VSQRTPSYm")>; +def : InstRW<[WriteDPPSr], (instregex "(V?)DPPS(Y?)rri")>; -def HWWriteResGroup140 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { - let Latency = 24; - let NumMicroOps = 27; - let ResourceCycles = [1,5,1,1,19]; +// x,m,i / v,v,m,i. +def WriteDPPSm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23, HWPort6]> { + let Latency = 18; + let NumMicroOps = 6; + let ResourceCycles = [2, 1, 1, 1, 1]; } -def: InstRW<[HWWriteResGroup140], (instregex "XSAVE64")>; +def : InstRW<[WriteDPPSm, ReadAfterLd], (instregex "(V?)DPPS(Y?)rmi")>; -def HWWriteResGroup141 : SchedWriteRes<[HWPort4,HWPort6,HWPort23,HWPort237,HWPort0156]> { - let Latency = 25; - let NumMicroOps = 28; - let ResourceCycles = [1,6,1,1,19]; +// DPPD. +// x,x,i. +def WriteDPPDr : SchedWriteRes<[HWPort0, HWPort1, HWPort5]> { + let Latency = 9; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup141], (instregex "XSAVE")>; +def : InstRW<[WriteDPPDr], (instregex "(V?)DPPDrri")>; -def HWWriteResGroup142 : SchedWriteRes<[HWPort0,HWPort5,HWPort23,HWPort015]> { - let Latency = 28; - let NumMicroOps = 11; - let ResourceCycles = [2,7,1,1]; +// x,m,i. +def WriteDPPDm : SchedWriteRes<[HWPort0, HWPort1, HWPort5, HWPort23]> { + let Latency = 13; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; } -def: InstRW<[HWWriteResGroup142], (instregex "AESKEYGENASSIST128rm")>; -def: InstRW<[HWWriteResGroup142], (instregex "VAESKEYGENASSIST128rm")>; +def : InstRW<[WriteDPPDm], (instregex "(V?)DPPDrmi")>; -def HWWriteResGroup143 : SchedWriteRes<[HWPort0,HWPort5,HWPort015]> { - let Latency = 29; - let NumMicroOps = 11; - let ResourceCycles = [2,7,2]; +// VFMADD. +// v,v,v. +def WriteFMADDr : SchedWriteRes<[HWPort01]> { + let Latency = 5; + let NumMicroOps = 1; +} +def : InstRW<[WriteFMADDr], + (instregex + // 3p forms. + "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)r(Y)?", + // 3s forms. + "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)r", + // 4s/4s_int forms. + "VF(N?)M(ADD|SUB)S(S|D)4rr(_REV|_Int)?", + // 4p forms. + "VF(N?)M(ADD|SUB)P(S|D)4rr(Y)?(_REV)?")>; + +// v,v,m. +def WriteFMADDm : SchedWriteRes<[HWPort01, HWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; +} +def : InstRW<[WriteFMADDm], + (instregex + // 3p forms. + "VF(N?)M(ADD|SUB|ADDSUB|SUBADD)P(S|D)(r213|r132|r231)m(Y)?", + // 3s forms. + "VF(N?)M(ADD|SUB)S(S|D)(r132|r231|r213)m", + // 4s/4s_int forms. + "VF(N?)M(ADD|SUB)S(S|D)4(rm|mr)(_Int)?", + // 4p forms. + "VF(N?)M(ADD|SUB)P(S|D)4(rm|mr)(Y)?")>; + +//-- Math instructions --// + +// VSQRTPS. +// y,y. +def WriteVSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 19; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup143], (instregex "AESKEYGENASSIST128rr")>; -def: InstRW<[HWWriteResGroup143], (instregex "VAESKEYGENASSIST128rr")>; +def : InstRW<[WriteVSQRTPSYr], (instregex "VSQRTPSYr")>; -def HWWriteResGroup145 : SchedWriteRes<[HWPort01,HWPort15,HWPort015,HWPort0156]> { - let Latency = 31; - let NumMicroOps = 31; - let ResourceCycles = [8,1,21,1]; +// y,m256. +def WriteVSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 23; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup145], (instregex "MMX_EMMS")>; +def : InstRW<[WriteVSQRTPSYm], (instregex "VSQRTPSYm")>; -def HWWriteResGroup146 : SchedWriteRes<[HWPort0,HWPort015]> { - let Latency = 35; +// VSQRTPD. +// y,y. +def WriteVSQRTPDYr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 28; let NumMicroOps = 3; - let ResourceCycles = [2,1]; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup146], (instregex "VDIVPDYrr")>; -def: InstRW<[HWWriteResGroup146], (instregex "VSQRTPDYr")>; +def : InstRW<[WriteVSQRTPDYr], (instregex "VSQRTPDYr")>; -def HWWriteResGroup147 : SchedWriteRes<[HWPort0,HWPort23,HWPort015]> { - let Latency = 35; +// y,m256. +def WriteVSQRTPDYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 32; let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup147], (instregex "VDIVPDYrm")>; -def: InstRW<[HWWriteResGroup147], (instregex "VSQRTPDYm")>; +def : InstRW<[WriteVSQRTPDYm], (instregex "VSQRTPDYm")>; -def HWWriteResGroup148 : SchedWriteRes<[HWPort1,HWPort4,HWPort5,HWPort6,HWPort23,HWPort237,HWPort15,HWPort0156]> { - let Latency = 35; - let NumMicroOps = 18; - let ResourceCycles = [1,1,2,3,1,1,1,8]; +// RSQRT SS/PS. +// x,x. +def WriteRSQRTr : SchedWriteRes<[HWPort0]> { + let Latency = 5; } -def: InstRW<[HWWriteResGroup148], (instregex "VMCLEARm")>; +def : InstRW<[WriteRSQRTr], (instregex "(V?)RSQRT(SS|PS)r(_Int)?")>; -def HWWriteResGroup149 : SchedWriteRes<[HWPort5,HWPort0156]> { - let Latency = 42; - let NumMicroOps = 22; - let ResourceCycles = [2,20]; +// x,m128. +def WriteRSQRTm : SchedWriteRes<[HWPort0, HWPort23]> { + let Latency = 9; + let NumMicroOps = 2; + let ResourceCycles = [1, 1]; } -def: InstRW<[HWWriteResGroup149], (instregex "RDTSCP")>; +def : InstRW<[WriteRSQRTm], (instregex "(V?)RSQRT(SS|PS)m(_Int)?")>; -def HWWriteResGroup150 : SchedWriteRes<[HWPort0,HWPort01,HWPort23,HWPort0,HWPort0,HWPort015,HWPort0156]> { - let Latency = 56; - let NumMicroOps = 64; - let ResourceCycles = [2,2,8,1,10,2,39]; +// RSQRTPS 256. +// y,y. +def WriteRSQRTPSYr : SchedWriteRes<[HWPort0, HWPort15]> { + let Latency = 7; + let NumMicroOps = 3; + let ResourceCycles = [2, 1]; } -def: InstRW<[HWWriteResGroup150], (instregex "FLDENVm")>; -def: InstRW<[HWWriteResGroup150], (instregex "FLDENVm")>; +def : InstRW<[WriteRSQRTPSYr], (instregex "VRSQRTPSYr(_Int)?")>; -def HWWriteResGroup151 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort0,HWPort0,HWPort15,HWPort0156]> { - let Latency = 59; - let NumMicroOps = 88; - let ResourceCycles = [4,4,31,1,2,1,45]; +// y,m256. +def WriteRSQRTPSYm : SchedWriteRes<[HWPort0, HWPort15, HWPort23]> { + let Latency = 11; + let NumMicroOps = 4; + let ResourceCycles = [2, 1, 1]; } -def: InstRW<[HWWriteResGroup151], (instregex "FXRSTOR64")>; +def : InstRW<[WriteRSQRTPSYm], (instregex "VRSQRTPSYm(_Int)?")>; -def HWWriteResGroup152 : SchedWriteRes<[HWPort0,HWPort6,HWPort23,HWPort0,HWPort0,HWPort15,HWPort0156]> { - let Latency = 59; - let NumMicroOps = 90; - let ResourceCycles = [4,2,33,1,2,1,47]; -} -def: InstRW<[HWWriteResGroup152], (instregex "FXRSTOR")>; +//-- Logic instructions --// -def HWWriteResGroup153 : SchedWriteRes<[HWPort5,HWPort01,HWPort0156]> { - let Latency = 75; - let NumMicroOps = 15; - let ResourceCycles = [6,3,6]; +// AND, ANDN, OR, XOR PS/PD. +// x,x / v,v,v. +def : InstRW<[WriteP5], (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rr")>; +// x,m / v,v,m. +def : InstRW<[WriteP5Ld, ReadAfterLd], + (instregex "(V?)(AND|ANDN|OR|XOR)P(S|D)(Y?)rm")>; + +//-- Other instructions --// + +// VZEROUPPER. +def WriteVZEROUPPER : SchedWriteRes<[]> { + let NumMicroOps = 4; } -def: InstRW<[HWWriteResGroup153], (instregex "FNINIT")>; +def : InstRW<[WriteVZEROUPPER], (instregex "VZEROUPPER")>; -def HWWriteResGroup154 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort01,HWPort0156]> { - let Latency = 98; - let NumMicroOps = 32; - let ResourceCycles = [7,7,3,3,1,11]; +// VZEROALL. +def WriteVZEROALL : SchedWriteRes<[]> { + let NumMicroOps = 12; } -def: InstRW<[HWWriteResGroup154], (instregex "DIV64r")>; +def : InstRW<[WriteVZEROALL], (instregex "VZEROALL")>; -def HWWriteResGroup155 : SchedWriteRes<[HWPort0,HWPort1,HWPort5,HWPort6,HWPort0,HWPort0156]> { - let Latency = 112; - let NumMicroOps = 66; - let ResourceCycles = [4,2,4,8,14,34]; +// LDMXCSR. +def WriteLDMXCSR : SchedWriteRes<[HWPort0, HWPort6, HWPort23]> { + let Latency = 6; + let NumMicroOps = 3; + let ResourceCycles = [1, 1, 1]; } -def: InstRW<[HWWriteResGroup155], (instregex "IDIV64r")>; +def : InstRW<[WriteLDMXCSR], (instregex "(V)?LDMXCSR")>; -def HWWriteResGroup156 : SchedWriteRes<[HWPort0,HWPort1,HWPort4,HWPort5,HWPort6,HWPort237,HWPort0,HWPort0156]> { - let Latency = 114; - let NumMicroOps = 100; - let ResourceCycles = [9,9,11,8,1,11,21,30]; +// STMXCSR. +def WriteSTMXCSR : SchedWriteRes<[HWPort0, HWPort4, HWPort6, HWPort237]> { + let Latency = 7; + let NumMicroOps = 4; + let ResourceCycles = [1, 1, 1, 1]; } -def: InstRW<[HWWriteResGroup156], (instregex "FSTENVm")>; -def: InstRW<[HWWriteResGroup156], (instregex "FSTENVm")>; +def : InstRW<[WriteSTMXCSR], (instregex "(V)?STMXCSR")>; } // SchedModel diff --git a/lib/Target/X86/X86SchedSandyBridge.td b/lib/Target/X86/X86SchedSandyBridge.td index c408f72c1ba..b8ec5883152 100644 --- a/lib/Target/X86/X86SchedSandyBridge.td +++ b/lib/Target/X86/X86SchedSandyBridge.td @@ -24,8 +24,8 @@ def SandyBridgeModel : SchedMachineModel { // Based on the LSD (loop-stream detector) queue size. let LoopMicroOpBufferSize = 28; - // This flag is set to allow the scheduler to assign - // a default model to unrecognized opcodes. + // FIXME: SSE4 and AVX are unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. let CompleteModel = 0; } @@ -48,7 +48,6 @@ def SBPort23 : ProcResource<2>; def SBPort4 : ProcResource<1>; // Many micro-ops are capable of issuing on multiple ports. -def SBPort01 : ProcResGroup<[SBPort0, SBPort1]>; def SBPort05 : ProcResGroup<[SBPort0, SBPort5]>; def SBPort15 : ProcResGroup<[SBPort1, SBPort5]>; def SBPort015 : ProcResGroup<[SBPort0, SBPort1, SBPort5]>; @@ -158,6 +157,31 @@ def : WriteRes { let ResourceCycles = [1, 1, 1, 1]; } +//////////////////////////////////////////////////////////////////////////////// +// Horizontal add/sub instructions. +//////////////////////////////////////////////////////////////////////////////// +// HADD, HSUB PS/PD +// x,x / v,v,v. +def : WriteRes { + let Latency = 3; +} + +// x,m / v,v,m. +def : WriteRes { + let Latency = 7; + let ResourceCycles = [1, 1]; +} + +// PHADD|PHSUB (S) W/D. +// v <- v,v. +def : WriteRes; + +// v <- v,m. +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 1]; +} + // String instructions. // Packed Compare Implicit Length Strings, Return Mask def : WriteRes { @@ -248,2282 +272,4 @@ def : WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; - -//////////////////////////////////////////////////////////////////////////////// -// Horizontal add/sub instructions. -//////////////////////////////////////////////////////////////////////////////// -// HADD, HSUB PS/PD -// x,x / v,v,v. -def : WriteRes { - let Latency = 3; -} - -// x,m / v,v,m. -def : WriteRes { - let Latency = 7; - let ResourceCycles = [1, 1]; -} - -// PHADD|PHSUB (S) W/D. -// v <- v,v. -def : WriteRes; - -// v <- v,m. -def : WriteRes { - let Latency = 5; - let ResourceCycles = [1, 1]; -} - -// Remaining SNB instrs. - -def SBWriteResGroup0 : SchedWriteRes<[SBPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup0], (instregex "CVTSS2SDrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSLLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSLLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSLLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRADri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRAWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "PSRLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VCVTSS2SDrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPMOVMSKBrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSLLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSLLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSLLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRADri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRAWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRLDri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRLQri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VPSRLWri")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDYrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPDrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSYrr")>; -def: InstRW<[SBWriteResGroup0], (instregex "VTESTPSrr")>; - -def SBWriteResGroup1 : SchedWriteRes<[SBPort5]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup1], (instregex "ANDNPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "ANDNPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "ANDPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "ANDPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "FDECSTP")>; -def: InstRW<[SBWriteResGroup1], (instregex "FFREE")>; -def: InstRW<[SBWriteResGroup1], (instregex "FINCSTP")>; -def: InstRW<[SBWriteResGroup1], (instregex "FNOP")>; -def: InstRW<[SBWriteResGroup1], (instregex "INSERTPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "JMP64r")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOV64toPQIrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVAPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVAPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVDDUPrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVDI2PDIrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVHLPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVLHPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVSDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVSHDUPrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVSLDUPrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVSSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVUPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "MOVUPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "ORPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "ORPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "RETQ")>; -def: InstRW<[SBWriteResGroup1], (instregex "SHUFPDrri")>; -def: InstRW<[SBWriteResGroup1], (instregex "SHUFPSrri")>; -def: InstRW<[SBWriteResGroup1], (instregex "UNPCKHPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "UNPCKHPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "UNPCKLPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "UNPCKLPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDNPDYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDNPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDNPSYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDNPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VANDPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VEXTRACTF128rr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VGATHERQPSZrm")>; -def: InstRW<[SBWriteResGroup1], (instregex "VINSERTF128rr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VINSERTPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOV64toPQIrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOV64toPQIrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPDYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPSYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVAPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVDDUPYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVDDUPrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVHLPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVHLPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVSDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVSHDUPYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVSHDUPrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVSLDUPYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVSLDUPrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVSSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPDYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPSYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VMOVUPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VORPDYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VORPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VORPSYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VORPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPDri")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPDrm")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSri")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSrm")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VPERMILPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPDYrri")>; -def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPDrri")>; -def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPSYrri")>; -def: InstRW<[SBWriteResGroup1], (instregex "VSHUFPSrri")>; -def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKHPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKHPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPDYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPSYrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VUNPCKLPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VXORPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "VXORPSrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "XORPDrr")>; -def: InstRW<[SBWriteResGroup1], (instregex "XORPSrr")>; - -def SBWriteResGroup2 : SchedWriteRes<[SBPort01]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup2], (instregex "LEA64_32r")>; - -def SBWriteResGroup3 : SchedWriteRes<[SBPort0]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup3], (instregex "BLENDPDrri")>; -def: InstRW<[SBWriteResGroup3], (instregex "BLENDPSrri")>; -def: InstRW<[SBWriteResGroup3], (instregex "BT32ri8")>; -def: InstRW<[SBWriteResGroup3], (instregex "BT32rr")>; -def: InstRW<[SBWriteResGroup3], (instregex "BTC32ri8")>; -def: InstRW<[SBWriteResGroup3], (instregex "BTC32rr")>; -def: InstRW<[SBWriteResGroup3], (instregex "BTR32ri8")>; -def: InstRW<[SBWriteResGroup3], (instregex "BTR32rr")>; -def: InstRW<[SBWriteResGroup3], (instregex "BTS32ri8")>; -def: InstRW<[SBWriteResGroup3], (instregex "BTS32rr")>; -def: InstRW<[SBWriteResGroup3], (instregex "CDQ")>; -def: InstRW<[SBWriteResGroup3], (instregex "CQO")>; -def: InstRW<[SBWriteResGroup3], (instregex "LAHF")>; -def: InstRW<[SBWriteResGroup3], (instregex "SAHF")>; -def: InstRW<[SBWriteResGroup3], (instregex "SAR32ri")>; -def: InstRW<[SBWriteResGroup3], (instregex "SAR8ri")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETAEr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETBr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETEr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETGEr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETGr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETLEr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETLr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETNEr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETNOr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETNPr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETNSr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETOr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETPr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SETSr")>; -def: InstRW<[SBWriteResGroup3], (instregex "SHL32ri")>; -def: InstRW<[SBWriteResGroup3], (instregex "SHL64r1")>; -def: InstRW<[SBWriteResGroup3], (instregex "SHL8r1")>; -def: InstRW<[SBWriteResGroup3], (instregex "SHL8ri")>; -def: InstRW<[SBWriteResGroup3], (instregex "SHR32ri")>; -def: InstRW<[SBWriteResGroup3], (instregex "SHR8ri")>; -def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPDYrri")>; -def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPDrri")>; -def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPSYrri")>; -def: InstRW<[SBWriteResGroup3], (instregex "VBLENDPSrri")>; -def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQAYrr")>; -def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQArr")>; -def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQUYrr")>; -def: InstRW<[SBWriteResGroup3], (instregex "VMOVDQUrr")>; - -def SBWriteResGroup4 : SchedWriteRes<[SBPort15]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup4], (instregex "KORTESTBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PABSBrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PABSDrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PABSWrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PADDQirr")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PALIGNR64irr")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSHUFBrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSIGNBrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSIGNDrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "MMX_PSIGNWrr64")>; -def: InstRW<[SBWriteResGroup4], (instregex "PABSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PABSDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PABSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PACKSSDWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PACKSSWBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PACKUSDWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PACKUSWBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDUSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDUSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PADDWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PALIGNRrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PAVGBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PAVGWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PBLENDWrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPEQWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPGTBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPGTDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PCMPGTWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMAXSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMAXSDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMAXSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMAXUBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMAXUDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMAXUWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMINSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMINSDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMINSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMINUBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMINUDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMINUWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXBDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXBQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVSXWQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXBDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXBQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PMOVZXWQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSHUFBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSHUFDri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSHUFHWri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSHUFLWri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSIGNBrr128")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSIGNDrr128")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSIGNWrr128")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSLLDQri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSRLDQri")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBUSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBUSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PSUBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHQDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKHWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLQDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "PUNPCKLWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VMASKMOVPSYrm")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPABSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPABSDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPABSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPACKSSDWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPACKSSWBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPACKUSDWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPACKUSWBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPADDBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPADDDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPADDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPADDUSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPADDUSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPALIGNRrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPAVGBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPAVGWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPBLENDWrri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPCMPEQBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPCMPEQDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPCMPEQWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPCMPGTBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPCMPGTDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPCMPGTWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMAXSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMAXSDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMAXSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMAXUBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMAXUDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMAXUWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMINSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMINSDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMINSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMINUBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMINUDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMINUWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXBDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXBQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVSXWQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXBDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXBQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPMOVZXWQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSHUFBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSHUFDri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSHUFLWri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSIGNBrr128")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSIGNDrr128")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSIGNWrr128")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSLLDQri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSRLDQri")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBUSBrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBUSWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPSUBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKHBWrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKHDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKHWDrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKLDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKLQDQrr")>; -def: InstRW<[SBWriteResGroup4], (instregex "VPUNPCKLWDrr")>; - -def SBWriteResGroup5 : SchedWriteRes<[SBPort015]> { - let Latency = 1; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup5], (instregex "ADD32ri8")>; -def: InstRW<[SBWriteResGroup5], (instregex "ADD32rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "ADD8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "ADD8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "AND32ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "AND64ri8")>; -def: InstRW<[SBWriteResGroup5], (instregex "AND64rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "AND8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "AND8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "CBW")>; -def: InstRW<[SBWriteResGroup5], (instregex "CMC")>; -def: InstRW<[SBWriteResGroup5], (instregex "CMP16ri8")>; -def: InstRW<[SBWriteResGroup5], (instregex "CMP32i32")>; -def: InstRW<[SBWriteResGroup5], (instregex "CMP64rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "CMP8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "CMP8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "CWDE")>; -def: InstRW<[SBWriteResGroup5], (instregex "DEC64r")>; -def: InstRW<[SBWriteResGroup5], (instregex "DEC8r")>; -def: InstRW<[SBWriteResGroup5], (instregex "INC64r")>; -def: InstRW<[SBWriteResGroup5], (instregex "INC8r")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_MOVD64from64rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MMX_MOVQ2DQrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOV32rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOV8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOV8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVDQArr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVDQUrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVPQI2QIrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVSX32rr16")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVSX32rr8")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVZX32rr16")>; -def: InstRW<[SBWriteResGroup5], (instregex "MOVZX32rr8")>; -def: InstRW<[SBWriteResGroup5], (instregex "NEG64r")>; -def: InstRW<[SBWriteResGroup5], (instregex "NEG8r")>; -def: InstRW<[SBWriteResGroup5], (instregex "NOT64r")>; -def: InstRW<[SBWriteResGroup5], (instregex "NOT8r")>; -def: InstRW<[SBWriteResGroup5], (instregex "OR64ri8")>; -def: InstRW<[SBWriteResGroup5], (instregex "OR64rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "OR8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "OR8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PANDNrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PANDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PORrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "PXORrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "STC")>; -def: InstRW<[SBWriteResGroup5], (instregex "SUB64ri8")>; -def: InstRW<[SBWriteResGroup5], (instregex "SUB64rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "SUB8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "SUB8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "TEST64rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "TEST8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "TEST8rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VMOVPQI2QIrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VMOVZPQILo2PQIrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPANDNrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPANDrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPORrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "VPXORrr")>; -def: InstRW<[SBWriteResGroup5], (instregex "XOR32rr")>; -def: InstRW<[SBWriteResGroup5], (instregex "XOR64ri8")>; -def: InstRW<[SBWriteResGroup5], (instregex "XOR8ri")>; -def: InstRW<[SBWriteResGroup5], (instregex "XOR8rr")>; - -def SBWriteResGroup6 : SchedWriteRes<[SBPort0]> { - let Latency = 2; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup6], (instregex "MOVMSKPDrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVMSKPSrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVPDI2DIrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "MOVPQIto64rr")>; -def: InstRW<[SBWriteResGroup6], (instregex "PMOVMSKBrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVMSKPDYrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVMSKPDrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVMSKPSrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVPDI2DIrr")>; -def: InstRW<[SBWriteResGroup6], (instregex "VMOVPQIto64rr")>; - -def SBWriteResGroup8 : SchedWriteRes<[SBPort0]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup8], (instregex "BLENDVPDrr0")>; -def: InstRW<[SBWriteResGroup8], (instregex "BLENDVPSrr0")>; -def: InstRW<[SBWriteResGroup8], (instregex "ROL32ri")>; -def: InstRW<[SBWriteResGroup8], (instregex "ROL8ri")>; -def: InstRW<[SBWriteResGroup8], (instregex "ROR32ri")>; -def: InstRW<[SBWriteResGroup8], (instregex "ROR8ri")>; -def: InstRW<[SBWriteResGroup8], (instregex "SETAr")>; -def: InstRW<[SBWriteResGroup8], (instregex "SETBEr")>; -def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPDYrr")>; -def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPDrr")>; -def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPSYrr")>; -def: InstRW<[SBWriteResGroup8], (instregex "VBLENDVPSrr")>; - -def SBWriteResGroup9 : SchedWriteRes<[SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup9], (instregex "VPBLENDVBrr")>; - -def SBWriteResGroup10 : SchedWriteRes<[SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup10], (instregex "SCASB")>; -def: InstRW<[SBWriteResGroup10], (instregex "SCASL")>; -def: InstRW<[SBWriteResGroup10], (instregex "SCASQ")>; -def: InstRW<[SBWriteResGroup10], (instregex "SCASW")>; - -def SBWriteResGroup11 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup11], (instregex "COMISDrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "COMISSrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "UCOMISDrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "UCOMISSrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "VCOMISDrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "VCOMISSrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "VUCOMISDrr")>; -def: InstRW<[SBWriteResGroup11], (instregex "VUCOMISSrr")>; - -def SBWriteResGroup12 : SchedWriteRes<[SBPort0,SBPort5]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup12], (instregex "CVTPS2PDrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "PTESTrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VCVTPS2PDYrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VCVTPS2PDrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VPTESTYrr")>; -def: InstRW<[SBWriteResGroup12], (instregex "VPTESTrr")>; - -def SBWriteResGroup13 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup13], (instregex "PSLLDrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSLLQrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSLLWrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSRADrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSRAWrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSRLDrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSRLQrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "PSRLWrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPSRADrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPSRAWrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPSRLDrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPSRLQrr")>; -def: InstRW<[SBWriteResGroup13], (instregex "VPSRLWrr")>; - -def SBWriteResGroup14 : SchedWriteRes<[SBPort1,SBPort0]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup14], (instregex "BSWAP32r")>; - -def SBWriteResGroup15 : SchedWriteRes<[SBPort5,SBPort15]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup15], (instregex "PINSRBrr")>; -def: InstRW<[SBWriteResGroup15], (instregex "PINSRDrr")>; -def: InstRW<[SBWriteResGroup15], (instregex "PINSRQrr")>; -def: InstRW<[SBWriteResGroup15], (instregex "PINSRWrri")>; -def: InstRW<[SBWriteResGroup15], (instregex "VPINSRBrr")>; -def: InstRW<[SBWriteResGroup15], (instregex "VPINSRDrr")>; -def: InstRW<[SBWriteResGroup15], (instregex "VPINSRQrr")>; -def: InstRW<[SBWriteResGroup15], (instregex "VPINSRWrri")>; - -def SBWriteResGroup16 : SchedWriteRes<[SBPort5,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup16], (instregex "MMX_MOVDQ2Qrr")>; - -def SBWriteResGroup17 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 2; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup17], (instregex "ADC64ri8")>; -def: InstRW<[SBWriteResGroup17], (instregex "ADC64rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "ADC8ri")>; -def: InstRW<[SBWriteResGroup17], (instregex "ADC8rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVAE32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVB32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVE32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVG32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVGE32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVL32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVLE32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVNE32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVNO32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVNP32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVNS32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVO32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVP32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "CMOVS32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "SBB32rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "SBB64ri8")>; -def: InstRW<[SBWriteResGroup17], (instregex "SBB8ri")>; -def: InstRW<[SBWriteResGroup17], (instregex "SBB8rr")>; -def: InstRW<[SBWriteResGroup17], (instregex "SHLD32rri8")>; -def: InstRW<[SBWriteResGroup17], (instregex "SHRD32rri8")>; - -def SBWriteResGroup18 : SchedWriteRes<[SBPort0]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup18], (instregex "MMX_PMADDUBSWrr64")>; -def: InstRW<[SBWriteResGroup18], (instregex "MMX_PMULHRSWrr64")>; -def: InstRW<[SBWriteResGroup18], (instregex "MMX_PMULUDQirr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMADDUBSWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMADDWDrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULDQrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULHRSWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULHUWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULHWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULLDrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULLWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PMULUDQrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "PSADBWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VMOVMSKPSYrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMADDUBSWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMADDWDrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMULDQrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMULHRSWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMULHWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMULLDrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPMULLWrr")>; -def: InstRW<[SBWriteResGroup18], (instregex "VPSADBWrr")>; - -def SBWriteResGroup19 : SchedWriteRes<[SBPort1]> { - let Latency = 3; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup19], (instregex "ADDPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADDPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADDSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADDSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADDSUBPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ADDSUBPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "BSF32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "BSR32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMPPDrri")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMPPSrri")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMPSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CMPSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CRC32r32r32")>; -def: InstRW<[SBWriteResGroup19], (instregex "CRC32r32r8")>; -def: InstRW<[SBWriteResGroup19], (instregex "CVTDQ2PSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CVTPS2DQrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "CVTTPS2DQrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MAXPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MAXPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MAXSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MAXSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MINPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MINPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MINSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MINSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MMX_CVTPI2PSirr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MMX_CVTPS2PIirr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MMX_CVTTPS2PIirr")>; -def: InstRW<[SBWriteResGroup19], (instregex "MUL8r")>; -def: InstRW<[SBWriteResGroup19], (instregex "POPCNT32rr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ROUNDPDr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ROUNDPSr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ROUNDSDr")>; -def: InstRW<[SBWriteResGroup19], (instregex "ROUNDSSr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SUBPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SUBPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SUBSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "SUBSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDPDYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDPSYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPDYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPSYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VADDSUBPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VBROADCASTF128")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCMPPDYrri")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCMPPDrri")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCMPPSYrri")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCMPPSrri")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCMPSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCMPSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCVTDQ2PSYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCVTDQ2PSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCVTPS2DQYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCVTPS2DQrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VCVTTPS2DQrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMAXPDYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMAXPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMAXPSYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMAXPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMAXSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMAXSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMINPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMINPSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMINSDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VMINSSrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VPBROADCASTMB2QZrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VROUNDPDr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VROUNDPSr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VROUNDSDr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VSUBPDYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VSUBPDrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VSUBPSYrr")>; -def: InstRW<[SBWriteResGroup19], (instregex "VSUBPSrr")>; - -def SBWriteResGroup20 : SchedWriteRes<[SBPort0,SBPort5]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup20], (instregex "EXTRACTPSrr")>; -def: InstRW<[SBWriteResGroup20], (instregex "VEXTRACTPSrr")>; - -def SBWriteResGroup21 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 3; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup21], (instregex "PEXTRBrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "PEXTRDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "PEXTRQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "PEXTRWri")>; -def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRBrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRDrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRQrr")>; -def: InstRW<[SBWriteResGroup21], (instregex "VPEXTRWri")>; -def: InstRW<[SBWriteResGroup21], (instregex "SHL64rCL")>; -def: InstRW<[SBWriteResGroup21], (instregex "SHL8rCL")>; - -def SBWriteResGroup22 : SchedWriteRes<[SBPort15]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} -def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHADDSWrr64")>; -def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHADDWrr64")>; -def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHADDrr64")>; -def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHSUBDrr64")>; -def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHSUBSWrr64")>; -def: InstRW<[SBWriteResGroup22], (instregex "MMX_PHSUBWrr64")>; -def: InstRW<[SBWriteResGroup22], (instregex "PHADDDrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "PHADDSWrr128")>; -def: InstRW<[SBWriteResGroup22], (instregex "PHADDWrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "PHSUBDrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "PHSUBSWrr128")>; -def: InstRW<[SBWriteResGroup22], (instregex "PHSUBWrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "VPHADDDrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "VPHADDSWrr128")>; -def: InstRW<[SBWriteResGroup22], (instregex "VPHADDWrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "VPHSUBDrr")>; -def: InstRW<[SBWriteResGroup22], (instregex "VPHSUBSWrr128")>; -def: InstRW<[SBWriteResGroup22], (instregex "VPHSUBWrr")>; - -def SBWriteResGroup23 : SchedWriteRes<[SBPort015]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} -def: InstRW<[SBWriteResGroup23], (instregex "LEAVE64")>; -def: InstRW<[SBWriteResGroup23], (instregex "XADD32rr")>; -def: InstRW<[SBWriteResGroup23], (instregex "XADD8rr")>; - -def SBWriteResGroup24 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 3; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup24], (instregex "CMOVA32rr")>; -def: InstRW<[SBWriteResGroup24], (instregex "CMOVBE32rr")>; - -def SBWriteResGroup25 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup25], (instregex "MUL64r")>; - -def SBWriteResGroup26 : SchedWriteRes<[SBPort1,SBPort5]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup26], (instregex "CVTDQ2PDrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CVTPD2DQrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CVTPD2PSrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CVTSD2SSrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CVTSI2SD64rr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CVTSI2SDrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "CVTTPD2DQrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "MMX_CVTPD2PIirr")>; -def: InstRW<[SBWriteResGroup26], (instregex "MMX_CVTPI2PDirr")>; -def: InstRW<[SBWriteResGroup26], (instregex "MMX_CVTTPD2PIirr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTDQ2PDYrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTDQ2PDrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2DQYrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2DQrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2PSYrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTPD2PSrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTSI2SD64rr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTSI2SDrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTTPD2DQYrr")>; -def: InstRW<[SBWriteResGroup26], (instregex "VCVTTPD2DQrr")>; - -def SBWriteResGroup27 : SchedWriteRes<[SBPort1,SBPort015]> { - let Latency = 4; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup27], (instregex "MOV64sr")>; -def: InstRW<[SBWriteResGroup27], (instregex "PAUSE")>; - -def SBWriteResGroup28 : SchedWriteRes<[SBPort0]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup28], (instregex "MULPDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "MULPSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "MULSDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "MULSSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "PCMPGTQrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "PHMINPOSUWrr128")>; -def: InstRW<[SBWriteResGroup28], (instregex "RCPPSr")>; -def: InstRW<[SBWriteResGroup28], (instregex "RCPSSr")>; -def: InstRW<[SBWriteResGroup28], (instregex "RSQRTPSr")>; -def: InstRW<[SBWriteResGroup28], (instregex "RSQRTSSr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VMULPDYrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VMULPDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VMULPSYrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VMULPSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VMULSDrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VMULSSrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VPCMPGTQrr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VPHMINPOSUWrr128")>; -def: InstRW<[SBWriteResGroup28], (instregex "VRSQRTPSr")>; -def: InstRW<[SBWriteResGroup28], (instregex "VRSQRTSSr")>; - -def SBWriteResGroup29 : SchedWriteRes<[SBPort23]> { - let Latency = 5; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup29], (instregex "MOV32rm")>; -def: InstRW<[SBWriteResGroup29], (instregex "MOV8rm")>; -def: InstRW<[SBWriteResGroup29], (instregex "MOVSX32rm16")>; -def: InstRW<[SBWriteResGroup29], (instregex "MOVSX32rm8")>; -def: InstRW<[SBWriteResGroup29], (instregex "MOVZX32rm16")>; -def: InstRW<[SBWriteResGroup29], (instregex "MOVZX32rm8")>; -def: InstRW<[SBWriteResGroup29], (instregex "PREFETCH")>; - -def SBWriteResGroup30 : SchedWriteRes<[SBPort0,SBPort1]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup30], (instregex "CVTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTSD2SIrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTSS2SIrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTTSD2SIrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "CVTTSS2SIrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTSS2SIrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSD2SIrr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSS2SI64rr")>; -def: InstRW<[SBWriteResGroup30], (instregex "VCVTTSS2SIrr")>; - -def SBWriteResGroup31 : SchedWriteRes<[SBPort4,SBPort23]> { - let Latency = 5; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup31], (instregex "MOV64mr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOV8mr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVAPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVAPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVDQAmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVDQUmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVHPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVHPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVLPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVLPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVNTDQmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVNTI_64mr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVNTImr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVNTPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVNTPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVPDI2DImr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVPQI2QImr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVPQIto64mr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVSSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVUPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "MOVUPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "PUSH64i8")>; -def: InstRW<[SBWriteResGroup31], (instregex "PUSH64r")>; -def: InstRW<[SBWriteResGroup31], (instregex "VEXTRACTF128mr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPDYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPSYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVAPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQAYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQAmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQUYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVDQUmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVHPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVHPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVLPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVLPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTDQYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTDQmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPDYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPSYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVNTPSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVPDI2DImr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVPQI2QImr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVPQIto64mr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVSDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVSSmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPDYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPDmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPSYmr")>; -def: InstRW<[SBWriteResGroup31], (instregex "VMOVUPSmr")>; - -def SBWriteResGroup32 : SchedWriteRes<[SBPort0,SBPort15]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup32], (instregex "MPSADBWrri")>; -def: InstRW<[SBWriteResGroup32], (instregex "VMPSADBWrri")>; - -def SBWriteResGroup33 : SchedWriteRes<[SBPort1,SBPort5]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup33], (instregex "CLI")>; -def: InstRW<[SBWriteResGroup33], (instregex "CVTSI2SS64rr")>; -def: InstRW<[SBWriteResGroup33], (instregex "CVTSI2SSrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "HADDPDrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "HADDPSrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "HSUBPDrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "HSUBPSrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VCVTSI2SS64rr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VCVTSI2SSrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHADDPDrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHADDPSYrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHADDPSrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPDYrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPDrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPSYrr")>; -def: InstRW<[SBWriteResGroup33], (instregex "VHSUBPSrr")>; - -def SBWriteResGroup34 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup34], (instregex "CALL64r")>; -def: InstRW<[SBWriteResGroup34], (instregex "EXTRACTPSmr")>; -def: InstRW<[SBWriteResGroup34], (instregex "VEXTRACTPSmr")>; - -def SBWriteResGroup35 : SchedWriteRes<[SBPort4,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup35], (instregex "VMASKMOVPDYrm")>; -def: InstRW<[SBWriteResGroup35], (instregex "VMASKMOVPDmr")>; -def: InstRW<[SBWriteResGroup35], (instregex "VMASKMOVPSmr")>; - -def SBWriteResGroup36 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup36], (instregex "SETAEm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETBm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETEm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETGEm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETGm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETLEm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETLm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETNEm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETNOm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETNPm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETNSm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETOm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETPm")>; -def: InstRW<[SBWriteResGroup36], (instregex "SETSm")>; - -def SBWriteResGroup37 : SchedWriteRes<[SBPort4,SBPort23,SBPort15]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup37], (instregex "PEXTRBmr")>; -def: InstRW<[SBWriteResGroup37], (instregex "VPEXTRBmr")>; -def: InstRW<[SBWriteResGroup37], (instregex "VPEXTRDmr")>; -def: InstRW<[SBWriteResGroup37], (instregex "VPEXTRWmr")>; - -def SBWriteResGroup38 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 5; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup38], (instregex "MOV8mi")>; -def: InstRW<[SBWriteResGroup38], (instregex "STOSB")>; -def: InstRW<[SBWriteResGroup38], (instregex "STOSL")>; -def: InstRW<[SBWriteResGroup38], (instregex "STOSQ")>; -def: InstRW<[SBWriteResGroup38], (instregex "STOSW")>; - -def SBWriteResGroup39 : SchedWriteRes<[SBPort5,SBPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup39], (instregex "FNINIT")>; - -def SBWriteResGroup40 : SchedWriteRes<[SBPort0,SBPort015]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup40], (instregex "CMPXCHG32rr")>; -def: InstRW<[SBWriteResGroup40], (instregex "CMPXCHG8rr")>; - -def SBWriteResGroup41 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup41], (instregex "SETAm")>; -def: InstRW<[SBWriteResGroup41], (instregex "SETBEm")>; - -def SBWriteResGroup42 : SchedWriteRes<[SBPort0,SBPort4,SBPort5,SBPort23]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup42], (instregex "LDMXCSR")>; -def: InstRW<[SBWriteResGroup42], (instregex "STMXCSR")>; -def: InstRW<[SBWriteResGroup42], (instregex "VLDMXCSR")>; -def: InstRW<[SBWriteResGroup42], (instregex "VSTMXCSR")>; - -def SBWriteResGroup43 : SchedWriteRes<[SBPort0,SBPort4,SBPort23,SBPort15]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup43], (instregex "PEXTRDmr")>; -def: InstRW<[SBWriteResGroup43], (instregex "PEXTRQmr")>; -def: InstRW<[SBWriteResGroup43], (instregex "VPEXTRQmr")>; -def: InstRW<[SBWriteResGroup43], (instregex "PUSHF16")>; -def: InstRW<[SBWriteResGroup43], (instregex "PUSHF64")>; - -def SBWriteResGroup44 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup44], (instregex "CLFLUSH")>; - -def SBWriteResGroup45 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { - let Latency = 5; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup45], (instregex "FXRSTOR")>; - -def SBWriteResGroup46 : SchedWriteRes<[SBPort23]> { - let Latency = 6; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup46], (instregex "LDDQUrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MMX_MOVD64from64rm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOV64toPQIrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVAPDrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVAPSrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVDDUPrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVDI2PDIrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVDQArm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVDQUrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVNTDQArm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVSHDUPrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVSLDUPrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVSSrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVUPDrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "MOVUPSrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "POP64r")>; -def: InstRW<[SBWriteResGroup46], (instregex "VBROADCASTSSrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VLDDQUYrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VLDDQUrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOV64toPQIrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVAPDrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVAPSrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVDDUPrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVDI2PDIrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVDQArm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVDQUrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVNTDQArm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVQI2PQIrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVSDrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVSHDUPrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVSLDUPrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVSSrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVUPDrm")>; -def: InstRW<[SBWriteResGroup46], (instregex "VMOVUPSrm")>; - -def SBWriteResGroup47 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup47], (instregex "JMP64m")>; -def: InstRW<[SBWriteResGroup47], (instregex "MOV64sm")>; - -def SBWriteResGroup48 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup48], (instregex "BT64mi8")>; - -def SBWriteResGroup49 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PABSBrm64")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PABSDrm64")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PABSWrm64")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PALIGNR64irm")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSHUFBrm64")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSIGNBrm64")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSIGNDrm64")>; -def: InstRW<[SBWriteResGroup49], (instregex "MMX_PSIGNWrm64")>; - -def SBWriteResGroup50 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 6; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup50], (instregex "ADD64rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "ADD8rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "AND64rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "AND8rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "CMP64mi8")>; -def: InstRW<[SBWriteResGroup50], (instregex "CMP64mr")>; -def: InstRW<[SBWriteResGroup50], (instregex "CMP64rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "CMP8mi")>; -def: InstRW<[SBWriteResGroup50], (instregex "CMP8mr")>; -def: InstRW<[SBWriteResGroup50], (instregex "CMP8rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "LODSL")>; -def: InstRW<[SBWriteResGroup50], (instregex "LODSQ")>; -def: InstRW<[SBWriteResGroup50], (instregex "OR64rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "OR8rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "SUB64rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "SUB8rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "XOR64rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "XOR8rm")>; -def: InstRW<[SBWriteResGroup50], (instregex "POP64rmm")>; -def: InstRW<[SBWriteResGroup50], (instregex "PUSH64rmm")>; - -def SBWriteResGroup51 : SchedWriteRes<[SBPort23]> { - let Latency = 7; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup51], (instregex "VBROADCASTSDYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VBROADCASTSSrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVAPDYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVAPSYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVDDUPYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVDQAYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVDQUYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVSHDUPYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVSLDUPYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVUPDYrm")>; -def: InstRW<[SBWriteResGroup51], (instregex "VMOVUPSYrm")>; - -def SBWriteResGroup52 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup52], (instregex "CVTPS2PDrm")>; -def: InstRW<[SBWriteResGroup52], (instregex "CVTSS2SDrm")>; -def: InstRW<[SBWriteResGroup52], (instregex "VCVTPS2PDYrm")>; -def: InstRW<[SBWriteResGroup52], (instregex "VCVTPS2PDrm")>; -def: InstRW<[SBWriteResGroup52], (instregex "VCVTSS2SDrm")>; -def: InstRW<[SBWriteResGroup52], (instregex "VTESTPDrm")>; -def: InstRW<[SBWriteResGroup52], (instregex "VTESTPSrm")>; - -def SBWriteResGroup53 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup53], (instregex "ANDNPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "ANDNPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "ANDPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "ANDPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "INSERTPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "MOVHPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "MOVHPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "MOVLPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "MOVLPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "ORPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "ORPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "SHUFPDrmi")>; -def: InstRW<[SBWriteResGroup53], (instregex "SHUFPSrmi")>; -def: InstRW<[SBWriteResGroup53], (instregex "UNPCKHPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "UNPCKHPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "UNPCKLPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "UNPCKLPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VANDNPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VANDNPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VANDPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VANDPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VBROADCASTF128")>; -def: InstRW<[SBWriteResGroup53], (instregex "VINSERTPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VMOVHPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VMOVHPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VMOVLPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VMOVLPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VORPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VORPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPDmi")>; -def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPDri")>; -def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPSmi")>; -def: InstRW<[SBWriteResGroup53], (instregex "VPERMILPSri")>; -def: InstRW<[SBWriteResGroup53], (instregex "VSHUFPDrmi")>; -def: InstRW<[SBWriteResGroup53], (instregex "VSHUFPSrmi")>; -def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKHPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKHPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKLPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VUNPCKLPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VXORPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "VXORPSrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "XORPDrm")>; -def: InstRW<[SBWriteResGroup53], (instregex "XORPSrm")>; - -def SBWriteResGroup54 : SchedWriteRes<[SBPort5,SBPort015]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup54], (instregex "AESDECLASTrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "AESDECrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "AESENCLASTrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "AESENCrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "KANDQrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "VAESDECLASTrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "VAESDECrr")>; -def: InstRW<[SBWriteResGroup54], (instregex "VAESENCrr")>; - -def SBWriteResGroup55 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup55], (instregex "BLENDPDrmi")>; -def: InstRW<[SBWriteResGroup55], (instregex "BLENDPSrmi")>; -def: InstRW<[SBWriteResGroup55], (instregex "VBLENDPDrmi")>; -def: InstRW<[SBWriteResGroup55], (instregex "VBLENDPSrmi")>; -def: InstRW<[SBWriteResGroup55], (instregex "VINSERTF128rm")>; - -def SBWriteResGroup56 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup56], (instregex "MMX_PADDQirm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PABSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PABSDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PABSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PACKSSDWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PACKSSWBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PACKUSDWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PACKUSWBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDUSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDUSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PADDWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PALIGNRrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "PAVGBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PAVGWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PBLENDWrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPEQWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPGTBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPGTDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PCMPGTWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PINSRBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PINSRDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PINSRQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PINSRWrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMAXSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMAXSDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMAXSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMAXUBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMAXUDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMAXUWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMINSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMINSDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMINSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMINUBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMINUDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMINUWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXBDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXBQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVSXWQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXBDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXBQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PMOVZXWQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSHUFBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSHUFDmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSHUFHWmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSHUFLWmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSIGNBrm128")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSIGNDrm128")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSIGNWrm128")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBUSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBUSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PSUBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHQDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKHWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLQDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "PUNPCKLWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPABSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPABSDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPABSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPACKSSDWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPACKSSWBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPACKUSDWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPACKUSWBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDUSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDUSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPADDWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPALIGNRrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPAVGBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPAVGWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPBLENDWrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPEQWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPGTBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPGTDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPCMPGTWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPINSRBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPINSRDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPINSRQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPINSRWrmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMAXSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMAXSDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMAXSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMAXUBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMAXUDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMAXUWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMINSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMINSDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMINSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMINUBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMINUDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMINUWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXBDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXBQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVSXWQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXBDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXBQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPMOVZXWQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFDmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFHWmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSHUFLWmi")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSIGNBrm128")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSIGNDrm128")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSIGNWrm128")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBUSBrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBUSWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPSUBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHQDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKHWDrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLBWrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLQDQrm")>; -def: InstRW<[SBWriteResGroup56], (instregex "VPUNPCKLWDrm")>; - -def SBWriteResGroup57 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup57], (instregex "PANDNrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "PANDrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "PORrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "PXORrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "VPANDNrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "VPANDrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "VPORrm")>; -def: InstRW<[SBWriteResGroup57], (instregex "VPXORrm")>; - -def SBWriteResGroup58 : SchedWriteRes<[SBPort0,SBPort0]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup58], (instregex "VRCPPSr")>; -def: InstRW<[SBWriteResGroup58], (instregex "VRSQRTPSYr")>; - -def SBWriteResGroup59 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup59], (instregex "VERRm")>; -def: InstRW<[SBWriteResGroup59], (instregex "VERWm")>; - -def SBWriteResGroup60 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup60], (instregex "LODSB")>; -def: InstRW<[SBWriteResGroup60], (instregex "LODSW")>; - -def SBWriteResGroup61 : SchedWriteRes<[SBPort5,SBPort01,SBPort23]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup61], (instregex "FARJMP64")>; - -def SBWriteResGroup62 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { - let Latency = 7; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup62], (instregex "ADC64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "ADC8rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVAE64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVB64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVE64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVG64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVGE64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVL64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVLE64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVNE64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVNO64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVNP64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVNS64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVO64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVP64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "CMOVS64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "SBB64rm")>; -def: InstRW<[SBWriteResGroup62], (instregex "SBB8rm")>; - -def SBWriteResGroup63 : SchedWriteRes<[SBPort0,SBPort4,SBPort23]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup63], (instregex "FNSTSWm")>; - -def SBWriteResGroup64 : SchedWriteRes<[SBPort1,SBPort5,SBPort015]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup64], (instregex "SLDT32r")>; -def: InstRW<[SBWriteResGroup64], (instregex "STR32r")>; - -def SBWriteResGroup65 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup65], (instregex "CALL64m")>; -def: InstRW<[SBWriteResGroup65], (instregex "FNSTCW16m")>; - -def SBWriteResGroup66 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup66], (instregex "BTC64mi8")>; -def: InstRW<[SBWriteResGroup66], (instregex "BTR64mi8")>; -def: InstRW<[SBWriteResGroup66], (instregex "BTS64mi8")>; -def: InstRW<[SBWriteResGroup66], (instregex "SAR64mi")>; -def: InstRW<[SBWriteResGroup66], (instregex "SAR8mi")>; -def: InstRW<[SBWriteResGroup66], (instregex "SHL64m1")>; -def: InstRW<[SBWriteResGroup66], (instregex "SHL64mi")>; -def: InstRW<[SBWriteResGroup66], (instregex "SHL8m1")>; -def: InstRW<[SBWriteResGroup66], (instregex "SHL8mi")>; -def: InstRW<[SBWriteResGroup66], (instregex "SHR64mi")>; -def: InstRW<[SBWriteResGroup66], (instregex "SHR8mi")>; - -def SBWriteResGroup67 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 7; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup67], (instregex "ADD64mi8")>; -def: InstRW<[SBWriteResGroup67], (instregex "ADD64mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "ADD8mi")>; -def: InstRW<[SBWriteResGroup67], (instregex "ADD8mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "AND64mi8")>; -def: InstRW<[SBWriteResGroup67], (instregex "AND64mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "AND8mi")>; -def: InstRW<[SBWriteResGroup67], (instregex "AND8mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "DEC64m")>; -def: InstRW<[SBWriteResGroup67], (instregex "DEC8m")>; -def: InstRW<[SBWriteResGroup67], (instregex "INC64m")>; -def: InstRW<[SBWriteResGroup67], (instregex "INC8m")>; -def: InstRW<[SBWriteResGroup67], (instregex "NEG64m")>; -def: InstRW<[SBWriteResGroup67], (instregex "NEG8m")>; -def: InstRW<[SBWriteResGroup67], (instregex "NOT64m")>; -def: InstRW<[SBWriteResGroup67], (instregex "NOT8m")>; -def: InstRW<[SBWriteResGroup67], (instregex "OR64mi8")>; -def: InstRW<[SBWriteResGroup67], (instregex "OR64mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "OR8mi")>; -def: InstRW<[SBWriteResGroup67], (instregex "OR8mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "SUB64mi8")>; -def: InstRW<[SBWriteResGroup67], (instregex "SUB64mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "SUB8mi")>; -def: InstRW<[SBWriteResGroup67], (instregex "SUB8mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "TEST64rm")>; -def: InstRW<[SBWriteResGroup67], (instregex "TEST8mi")>; -def: InstRW<[SBWriteResGroup67], (instregex "TEST8rm")>; -def: InstRW<[SBWriteResGroup67], (instregex "XOR64mi8")>; -def: InstRW<[SBWriteResGroup67], (instregex "XOR64mr")>; -def: InstRW<[SBWriteResGroup67], (instregex "XOR8mi")>; -def: InstRW<[SBWriteResGroup67], (instregex "XOR8mr")>; - -def SBWriteResGroup68 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup68], (instregex "MMX_PMADDUBSWrm64")>; -def: InstRW<[SBWriteResGroup68], (instregex "MMX_PMULHRSWrm64")>; -def: InstRW<[SBWriteResGroup68], (instregex "VTESTPDYrm")>; -def: InstRW<[SBWriteResGroup68], (instregex "VTESTPSYrm")>; - -def SBWriteResGroup69 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup69], (instregex "BSF64rm")>; -def: InstRW<[SBWriteResGroup69], (instregex "BSR64rm")>; -def: InstRW<[SBWriteResGroup69], (instregex "CRC32r32m16")>; -def: InstRW<[SBWriteResGroup69], (instregex "CRC32r32m8")>; -def: InstRW<[SBWriteResGroup69], (instregex "MUL8m")>; - -def SBWriteResGroup70 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup70], (instregex "VANDNPDYrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VANDNPSYrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VANDPDrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VANDPSrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VORPDYrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VORPSYrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VPERM2F128rm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPDYri")>; -def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPDmi")>; -def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPSYri")>; -def: InstRW<[SBWriteResGroup70], (instregex "VPERMILPSmi")>; -def: InstRW<[SBWriteResGroup70], (instregex "VSHUFPDYrmi")>; -def: InstRW<[SBWriteResGroup70], (instregex "VSHUFPSYrmi")>; -def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKHPDrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKHPSrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKLPDYrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VUNPCKLPSYrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VXORPDrm")>; -def: InstRW<[SBWriteResGroup70], (instregex "VXORPSrm")>; - -def SBWriteResGroup71 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 8; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup71], (instregex "VBLENDPDYrmi")>; -def: InstRW<[SBWriteResGroup71], (instregex "VBLENDPSYrmi")>; - -def SBWriteResGroup72 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup72], (instregex "BLENDVPDrm0")>; -def: InstRW<[SBWriteResGroup72], (instregex "BLENDVPSrm0")>; -def: InstRW<[SBWriteResGroup72], (instregex "VBLENDVPDrm")>; -def: InstRW<[SBWriteResGroup72], (instregex "VBLENDVPSrm")>; -def: InstRW<[SBWriteResGroup72], (instregex "VMASKMOVPDrm")>; -def: InstRW<[SBWriteResGroup72], (instregex "VMASKMOVPSrm")>; - -def SBWriteResGroup73 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup73], (instregex "PBLENDVBrr0")>; -def: InstRW<[SBWriteResGroup73], (instregex "VPBLENDVBrm")>; - -def SBWriteResGroup74 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup74], (instregex "COMISDrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "COMISSrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "UCOMISDrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "UCOMISSrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "VCOMISDrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "VCOMISSrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "VUCOMISDrm")>; -def: InstRW<[SBWriteResGroup74], (instregex "VUCOMISSrm")>; - -def SBWriteResGroup75 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup75], (instregex "PTESTrm")>; -def: InstRW<[SBWriteResGroup75], (instregex "VPTESTrm")>; - -def SBWriteResGroup76 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup76], (instregex "PSLLDrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSLLQrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSLLWrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSRADrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSRAWrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSRLDrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSRLQrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "PSRLWrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSLLDri")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSLLQri")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSLLWri")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSRADrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSRAWrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSRLDrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSRLQrm")>; -def: InstRW<[SBWriteResGroup76], (instregex "VPSRLWrm")>; - -def SBWriteResGroup77 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHADDSWrm64")>; -def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHADDWrm64")>; -def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHADDrm64")>; -def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHSUBDrm64")>; -def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHSUBSWrm64")>; -def: InstRW<[SBWriteResGroup77], (instregex "MMX_PHSUBWrm64")>; - -def SBWriteResGroup78 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup78], (instregex "CMPXCHG64rm")>; -def: InstRW<[SBWriteResGroup78], (instregex "CMPXCHG8rm")>; - -def SBWriteResGroup79 : SchedWriteRes<[SBPort23,SBPort0,SBPort015]> { - let Latency = 8; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup79], (instregex "CMOVA64rm")>; -def: InstRW<[SBWriteResGroup79], (instregex "CMOVBE64rm")>; - -def SBWriteResGroup80 : SchedWriteRes<[SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [2,3]; -} -def: InstRW<[SBWriteResGroup80], (instregex "CMPSB")>; -def: InstRW<[SBWriteResGroup80], (instregex "CMPSL")>; -def: InstRW<[SBWriteResGroup80], (instregex "CMPSQ")>; -def: InstRW<[SBWriteResGroup80], (instregex "CMPSW")>; - -def SBWriteResGroup81 : SchedWriteRes<[SBPort4,SBPort5,SBPort23]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,2]; -} -def: InstRW<[SBWriteResGroup81], (instregex "FLDCW16m")>; - -def SBWriteResGroup82 : SchedWriteRes<[SBPort4,SBPort23,SBPort0]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,2]; -} -def: InstRW<[SBWriteResGroup82], (instregex "ROL64mi")>; -def: InstRW<[SBWriteResGroup82], (instregex "ROL8mi")>; -def: InstRW<[SBWriteResGroup82], (instregex "ROR64mi")>; -def: InstRW<[SBWriteResGroup82], (instregex "ROR8mi")>; - -def SBWriteResGroup83 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,2]; -} -def: InstRW<[SBWriteResGroup83], (instregex "MOVSB")>; -def: InstRW<[SBWriteResGroup83], (instregex "MOVSL")>; -def: InstRW<[SBWriteResGroup83], (instregex "MOVSQ")>; -def: InstRW<[SBWriteResGroup83], (instregex "MOVSW")>; -def: InstRW<[SBWriteResGroup83], (instregex "XADD64rm")>; -def: InstRW<[SBWriteResGroup83], (instregex "XADD8rm")>; - -def SBWriteResGroup84 : SchedWriteRes<[SBPort4,SBPort5,SBPort01,SBPort23]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,1,1,2]; -} -def: InstRW<[SBWriteResGroup84], (instregex "FARCALL64")>; - -def SBWriteResGroup85 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { - let Latency = 8; - let NumMicroOps = 5; - let ResourceCycles = [1,2,1,1]; -} -def: InstRW<[SBWriteResGroup85], (instregex "SHLD64mri8")>; -def: InstRW<[SBWriteResGroup85], (instregex "SHRD64mri8")>; - -def SBWriteResGroup86 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup86], (instregex "MMX_PMULUDQirm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMADDUBSWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMADDWDrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULDQrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULHRSWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULHUWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULHWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULLDrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULLWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PMULUDQrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "PSADBWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMADDUBSWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMADDWDrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULDQrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULHRSWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULHUWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULHWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULLDrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULLWrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPMULUDQrm")>; -def: InstRW<[SBWriteResGroup86], (instregex "VPSADBWrm")>; - -def SBWriteResGroup87 : SchedWriteRes<[SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup87], (instregex "ADDPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ADDPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ADDSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ADDSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ADDSUBPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ADDSUBPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "CMPPDrmi")>; -def: InstRW<[SBWriteResGroup87], (instregex "CMPPSrmi")>; -def: InstRW<[SBWriteResGroup87], (instregex "CMPSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "CVTDQ2PSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "CVTPS2DQrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "CVTSI2SD64rm")>; -def: InstRW<[SBWriteResGroup87], (instregex "CVTSI2SDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "CVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MAXPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MAXPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MAXSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MAXSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MINPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MINPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MINSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MINSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MMX_CVTPI2PSirm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MMX_CVTPS2PIirm")>; -def: InstRW<[SBWriteResGroup87], (instregex "MMX_CVTTPS2PIirm")>; -def: InstRW<[SBWriteResGroup87], (instregex "POPCNT64rm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ROUNDPDm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ROUNDPSm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ROUNDSDm")>; -def: InstRW<[SBWriteResGroup87], (instregex "ROUNDSSm")>; -def: InstRW<[SBWriteResGroup87], (instregex "SUBPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "SUBPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "SUBSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "SUBSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VADDPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VADDPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VADDSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VADDSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VADDSUBPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VADDSUBPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCMPPDrmi")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCMPPSrmi")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCMPSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCMPSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCVTDQ2PSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCVTPS2DQrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCVTSI2SD64rm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCVTSI2SDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMAXPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMAXPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMAXSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMAXSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMINPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMINPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMINSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VMINSSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VROUNDPDm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VROUNDPSm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VROUNDSDm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VROUNDSSm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VSUBPDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VSUBPSrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VSUBSDrm")>; -def: InstRW<[SBWriteResGroup87], (instregex "VSUBSSrm")>; - -def SBWriteResGroup88 : SchedWriteRes<[SBPort23,SBPort0]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,2]; -} -def: InstRW<[SBWriteResGroup88], (instregex "VBLENDVPDYrm")>; -def: InstRW<[SBWriteResGroup88], (instregex "VBLENDVPSYrm")>; -def: InstRW<[SBWriteResGroup88], (instregex "VMASKMOVPDrm")>; -def: InstRW<[SBWriteResGroup88], (instregex "VMASKMOVPSrm")>; - -def SBWriteResGroup89 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup89], (instregex "DPPDrri")>; -def: InstRW<[SBWriteResGroup89], (instregex "VDPPDrri")>; - -def SBWriteResGroup90 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup90], (instregex "CVTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTSD2SIrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTSS2SIrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTTSD2SIrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup90], (instregex "CVTTSS2SIrm")>; -def: InstRW<[SBWriteResGroup90], (instregex "MUL64m")>; - -def SBWriteResGroup91 : SchedWriteRes<[SBPort0,SBPort5,SBPort23]> { - let Latency = 9; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup91], (instregex "VPTESTYrm")>; - -def SBWriteResGroup92 : SchedWriteRes<[SBPort23,SBPort15]> { - let Latency = 9; - let NumMicroOps = 4; - let ResourceCycles = [1,3]; -} -def: InstRW<[SBWriteResGroup92], (instregex "PHADDDrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "PHADDSWrm128")>; -def: InstRW<[SBWriteResGroup92], (instregex "PHADDWrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "PHSUBDrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "PHSUBSWrm128")>; -def: InstRW<[SBWriteResGroup92], (instregex "PHSUBWrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "VPHADDDrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "VPHADDSWrm128")>; -def: InstRW<[SBWriteResGroup92], (instregex "VPHADDWrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "VPHSUBDrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "VPHSUBSWrm128")>; -def: InstRW<[SBWriteResGroup92], (instregex "VPHSUBWrm")>; -def: InstRW<[SBWriteResGroup92], (instregex "SHL64mCL")>; -def: InstRW<[SBWriteResGroup92], (instregex "SHL8mCL")>; - -def SBWriteResGroup93 : SchedWriteRes<[SBPort4,SBPort23,SBPort015]> { - let Latency = 9; - let NumMicroOps = 6; - let ResourceCycles = [1,2,3]; -} -def: InstRW<[SBWriteResGroup93], (instregex "ADC64mi8")>; -def: InstRW<[SBWriteResGroup93], (instregex "ADC8mi")>; -def: InstRW<[SBWriteResGroup93], (instregex "SBB64mi8")>; -def: InstRW<[SBWriteResGroup93], (instregex "SBB8mi")>; - -def SBWriteResGroup94 : SchedWriteRes<[SBPort4,SBPort23,SBPort0,SBPort015]> { - let Latency = 9; - let NumMicroOps = 6; - let ResourceCycles = [1,2,2,1]; -} -def: InstRW<[SBWriteResGroup94], (instregex "ADC64mr")>; -def: InstRW<[SBWriteResGroup94], (instregex "ADC8mr")>; -def: InstRW<[SBWriteResGroup94], (instregex "SBB64mr")>; -def: InstRW<[SBWriteResGroup94], (instregex "SBB8mr")>; - -def SBWriteResGroup95 : SchedWriteRes<[SBPort4,SBPort5,SBPort23,SBPort0,SBPort015]> { - let Latency = 9; - let NumMicroOps = 6; - let ResourceCycles = [1,1,2,1,1]; -} -def: InstRW<[SBWriteResGroup95], (instregex "BT64mr")>; -def: InstRW<[SBWriteResGroup95], (instregex "BTC64mr")>; -def: InstRW<[SBWriteResGroup95], (instregex "BTR64mr")>; -def: InstRW<[SBWriteResGroup95], (instregex "BTS64mr")>; -def: InstRW<[SBWriteResGroup95], (instregex "VADDPDYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VADDPSYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VADDSUBPDYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VADDSUBPSYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VCMPPDYrmi")>; -def: InstRW<[SBWriteResGroup95], (instregex "VCMPPSYrmi")>; -def: InstRW<[SBWriteResGroup95], (instregex "VCVTDQ2PSYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VCVTPS2DQYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VCVTTPS2DQrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VMAXPDYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VMAXPSYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VMINPDrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VMINPSrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VROUNDPDm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VROUNDPSm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VSUBPDYrm")>; -def: InstRW<[SBWriteResGroup95], (instregex "VSUBPSYrm")>; - -def SBWriteResGroup96 : SchedWriteRes<[SBPort0,SBPort1,SBPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup96], (instregex "VCVTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTSS2SIrm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSD2SI64rm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSD2SI64rr")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSS2SI64rm")>; -def: InstRW<[SBWriteResGroup96], (instregex "VCVTTSS2SIrm")>; - -def SBWriteResGroup97 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 10; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup97], (instregex "CVTDQ2PDrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "CVTPD2DQrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "CVTPD2PSrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "CVTSD2SSrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "CVTSI2SS64rm")>; -def: InstRW<[SBWriteResGroup97], (instregex "CVTSI2SSrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "CVTTPD2DQrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "MMX_CVTPD2PIirm")>; -def: InstRW<[SBWriteResGroup97], (instregex "MMX_CVTPI2PDirm")>; -def: InstRW<[SBWriteResGroup97], (instregex "MMX_CVTTPD2PIirm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTDQ2PDYrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTDQ2PDrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTPD2DQrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTPD2PSrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTSD2SSrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTSI2SS64rm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTSI2SSrm")>; -def: InstRW<[SBWriteResGroup97], (instregex "VCVTTPD2DQrm")>; - -def SBWriteResGroup98 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 11; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup98], (instregex "MULPDrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "MULPSrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "MULSDrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "MULSSrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "PCMPGTQrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "PHMINPOSUWrm128")>; -def: InstRW<[SBWriteResGroup98], (instregex "RCPPSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "RCPSSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "RSQRTPSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "RSQRTSSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VMULPDrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VMULPSrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VMULSDrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VMULSSrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VPCMPGTQrm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VPHMINPOSUWrm128")>; -def: InstRW<[SBWriteResGroup98], (instregex "VRCPPSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VRCPSSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VRSQRTPSm")>; -def: InstRW<[SBWriteResGroup98], (instregex "VRSQRTSSm")>; - -def SBWriteResGroup99 : SchedWriteRes<[SBPort0]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [3]; -} -def: InstRW<[SBWriteResGroup99], (instregex "PCMPISTRIrr")>; -def: InstRW<[SBWriteResGroup99], (instregex "PCMPISTRM128rr")>; -def: InstRW<[SBWriteResGroup99], (instregex "VPCMPISTRIrr")>; -def: InstRW<[SBWriteResGroup99], (instregex "VPCMPISTRM128rr")>; - -def SBWriteResGroup100 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 11; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup100], (instregex "VCVTPD2DQYrm")>; -def: InstRW<[SBWriteResGroup100], (instregex "VCVTPD2PSYrm")>; -def: InstRW<[SBWriteResGroup100], (instregex "VCVTTPD2DQYrm")>; - -def SBWriteResGroup101 : SchedWriteRes<[SBPort0,SBPort23,SBPort15]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [1,1,2]; -} -def: InstRW<[SBWriteResGroup101], (instregex "MPSADBWrmi")>; -def: InstRW<[SBWriteResGroup101], (instregex "VMPSADBWrmi")>; - -def SBWriteResGroup102 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 11; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup102], (instregex "HADDPDrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "HADDPSrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "HSUBPDrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "HSUBPSrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VHADDPDrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VHADDPSrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VHSUBPDrm")>; -def: InstRW<[SBWriteResGroup102], (instregex "VHSUBPSrm")>; - -def SBWriteResGroup103 : SchedWriteRes<[SBPort5]> { - let Latency = 12; - let NumMicroOps = 2; - let ResourceCycles = [2]; -} -def: InstRW<[SBWriteResGroup103], (instregex "AESIMCrr")>; -def: InstRW<[SBWriteResGroup103], (instregex "VAESIMCrr")>; -def: InstRW<[SBWriteResGroup103], (instregex "VMULPDYrm")>; -def: InstRW<[SBWriteResGroup103], (instregex "VMULPSYrm")>; - -def SBWriteResGroup104 : SchedWriteRes<[SBPort0,SBPort1,SBPort5]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup104], (instregex "DPPSrri")>; -def: InstRW<[SBWriteResGroup104], (instregex "VDPPSYrri")>; -def: InstRW<[SBWriteResGroup104], (instregex "VDPPSrri")>; - -def SBWriteResGroup105 : SchedWriteRes<[SBPort1,SBPort5,SBPort23]> { - let Latency = 12; - let NumMicroOps = 4; - let ResourceCycles = [1,2,1]; -} -def: InstRW<[SBWriteResGroup105], (instregex "VHADDPDrm")>; -def: InstRW<[SBWriteResGroup105], (instregex "VHADDPSYrm")>; -def: InstRW<[SBWriteResGroup105], (instregex "VHSUBPDYrm")>; -def: InstRW<[SBWriteResGroup105], (instregex "VHSUBPSYrm")>; - -def SBWriteResGroup106 : SchedWriteRes<[SBPort5,SBPort23,SBPort015]> { - let Latency = 13; - let NumMicroOps = 3; - let ResourceCycles = [1,1,1]; -} -def: InstRW<[SBWriteResGroup106], (instregex "AESDECLASTrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "AESDECrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "AESENCLASTrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "AESENCrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "VAESDECLASTrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "VAESDECrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "VAESENCLASTrm")>; -def: InstRW<[SBWriteResGroup106], (instregex "VAESENCrm")>; - -def SBWriteResGroup107 : SchedWriteRes<[SBPort0]> { - let Latency = 14; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup107], (instregex "DIVPSrr")>; -def: InstRW<[SBWriteResGroup107], (instregex "DIVSSrr")>; -def: InstRW<[SBWriteResGroup107], (instregex "SQRTPSr")>; -def: InstRW<[SBWriteResGroup107], (instregex "SQRTSSr")>; -def: InstRW<[SBWriteResGroup107], (instregex "VDIVPSrr")>; -def: InstRW<[SBWriteResGroup107], (instregex "VDIVSSrr")>; -def: InstRW<[SBWriteResGroup107], (instregex "VSQRTPSr")>; - -def SBWriteResGroup108 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 14; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup108], (instregex "VSQRTSSm")>; - -def SBWriteResGroup109 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { - let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup109], (instregex "VRCPPSm")>; -def: InstRW<[SBWriteResGroup109], (instregex "VRSQRTPSYm")>; - -def SBWriteResGroup110 : SchedWriteRes<[SBPort0,SBPort1,SBPort5,SBPort23]> { - let Latency = 15; - let NumMicroOps = 4; - let ResourceCycles = [1,1,1,1]; -} -def: InstRW<[SBWriteResGroup110], (instregex "DPPDrmi")>; -def: InstRW<[SBWriteResGroup110], (instregex "VDPPDrmi")>; - -def SBWriteResGroup111 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 17; - let NumMicroOps = 4; - let ResourceCycles = [3,1]; -} -def: InstRW<[SBWriteResGroup111], (instregex "PCMPISTRIrm")>; -def: InstRW<[SBWriteResGroup111], (instregex "PCMPISTRM128rm")>; -def: InstRW<[SBWriteResGroup111], (instregex "VPCMPISTRIrm")>; -def: InstRW<[SBWriteResGroup111], (instregex "VPCMPISTRM128rm")>; - -def SBWriteResGroup112 : SchedWriteRes<[SBPort5,SBPort23]> { - let Latency = 18; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup112], (instregex "AESIMCrm")>; -def: InstRW<[SBWriteResGroup112], (instregex "VAESIMCrm")>; - -def SBWriteResGroup113 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 20; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup113], (instregex "DIVPSrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "DIVSSrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "SQRTPSm")>; -def: InstRW<[SBWriteResGroup113], (instregex "SQRTSSm")>; -def: InstRW<[SBWriteResGroup113], (instregex "VDIVPSrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "VDIVSSrm")>; -def: InstRW<[SBWriteResGroup113], (instregex "VSQRTPSm")>; - -def SBWriteResGroup114 : SchedWriteRes<[SBPort0]> { - let Latency = 21; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup114], (instregex "VSQRTSDr")>; - -def SBWriteResGroup115 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 21; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup115], (instregex "VSQRTSDm")>; - -def SBWriteResGroup116 : SchedWriteRes<[SBPort0]> { - let Latency = 22; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup116], (instregex "DIVPDrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "DIVSDrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "SQRTPDr")>; -def: InstRW<[SBWriteResGroup116], (instregex "SQRTSDr")>; -def: InstRW<[SBWriteResGroup116], (instregex "VDIVPDrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "VDIVSDrr")>; -def: InstRW<[SBWriteResGroup116], (instregex "VSQRTPDr")>; - -def SBWriteResGroup117 : SchedWriteRes<[SBPort0,SBPort23]> { - let Latency = 28; - let NumMicroOps = 2; - let ResourceCycles = [1,1]; -} -def: InstRW<[SBWriteResGroup117], (instregex "DIVPDrm")>; -def: InstRW<[SBWriteResGroup117], (instregex "DIVSDrm")>; -def: InstRW<[SBWriteResGroup117], (instregex "SQRTPDm")>; -def: InstRW<[SBWriteResGroup117], (instregex "SQRTSDm")>; -def: InstRW<[SBWriteResGroup117], (instregex "VDIVPDrm")>; -def: InstRW<[SBWriteResGroup117], (instregex "VDIVSDrm")>; -def: InstRW<[SBWriteResGroup117], (instregex "VSQRTPDm")>; - -def SBWriteResGroup118 : SchedWriteRes<[SBPort0,SBPort0]> { - let Latency = 29; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup118], (instregex "VDIVPSYrr")>; -def: InstRW<[SBWriteResGroup118], (instregex "VSQRTPSYr")>; - -def SBWriteResGroup119 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { - let Latency = 36; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup119], (instregex "VDIVPSYrm")>; -def: InstRW<[SBWriteResGroup119], (instregex "VSQRTPSYm")>; - -def SBWriteResGroup120 : SchedWriteRes<[SBPort0,SBPort0]> { - let Latency = 45; - let NumMicroOps = 3; - let ResourceCycles = [2,1]; -} -def: InstRW<[SBWriteResGroup120], (instregex "VDIVPDYrr")>; -def: InstRW<[SBWriteResGroup120], (instregex "VSQRTPDYr")>; - -def SBWriteResGroup121 : SchedWriteRes<[SBPort0,SBPort23,SBPort0]> { - let Latency = 52; - let NumMicroOps = 4; - let ResourceCycles = [2,1,1]; -} -def: InstRW<[SBWriteResGroup121], (instregex "VDIVPDYrm")>; -def: InstRW<[SBWriteResGroup121], (instregex "VSQRTPDYm")>; - -def SBWriteResGroup122 : SchedWriteRes<[SBPort0]> { - let Latency = 114; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup122], (instregex "VSQRTSSr")>; - } // SchedModel diff --git a/test/CodeGen/X86/avx-schedule.ll b/test/CodeGen/X86/avx-schedule.ll index 6fe5f4c4da2..47e95fe31bd 100644 --- a/test/CodeGen/X86/avx-schedule.ll +++ b/test/CodeGen/X86/avx-schedule.ll @@ -10,14 +10,14 @@ define <4 x double> @test_addpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: @@ -40,14 +40,14 @@ define <8 x float> @test_addps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: @@ -70,14 +70,14 @@ define <4 x double> @test_addsubpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: @@ -101,14 +101,14 @@ define <8 x float> @test_addsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: @@ -131,17 +131,17 @@ declare <8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float>, <8 x float>) nounwi define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandnpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: @@ -172,17 +172,17 @@ define <4 x double> @test_andnotpd(<4 x double> %a0, <4 x double> %a1, <4 x doub define <8 x float> @test_andnotps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandnps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: @@ -216,14 +216,14 @@ define <4 x double> @test_andpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vandpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: @@ -255,14 +255,14 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vandps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vandps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: @@ -291,17 +291,17 @@ define <8 x float> @test_andps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],mem[1,2],ymm0[3] sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: @@ -326,15 +326,15 @@ define <4 x double> @test_blendpd(<4 x double> %a0, <4 x double> %a1, <4 x doubl define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:1.00] -; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.50] +; SANDY-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3,4,5,6,7] sched: [1:0.33] -; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2],ymm0[3],mem[4,5,6],ymm0[7] sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: @@ -356,15 +356,15 @@ define <8 x float> @test_blendps(<8 x float> %a0, <8 x float> %a1, <8 x float> * define <4 x double> @test_blendvpd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, <4 x double> *%a3) { ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendvpd %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: @@ -387,15 +387,15 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 define <8 x float> @test_blendvps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, <8 x float> *%a3) { ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [2:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendvps %ymm2, (%rdi), %ymm0, %ymm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: @@ -418,13 +418,13 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x f define <8 x float> @test_broadcastf128(<4 x float> *%a0) { ; SANDY-LABEL: test_broadcastf128: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastf128: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1] sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_broadcastf128: ; BTVER2: # BB#0: @@ -443,13 +443,13 @@ define <8 x float> @test_broadcastf128(<4 x float> *%a0) { define <4 x double> @test_broadcastsd_ymm(double *%a0) { ; SANDY-LABEL: test_broadcastsd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastsd_ymm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vbroadcastsd (%rdi), %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_broadcastsd_ymm: ; BTVER2: # BB#0: @@ -469,13 +469,13 @@ define <4 x double> @test_broadcastsd_ymm(double *%a0) { define <4 x float> @test_broadcastss(float *%a0) { ; SANDY-LABEL: test_broadcastss: ; SANDY: # BB#0: -; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vbroadcastss (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_broadcastss: ; BTVER2: # BB#0: @@ -496,12 +496,12 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { ; SANDY-LABEL: test_broadcastss_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_broadcastss_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vbroadcastss (%rdi), %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_broadcastss_ymm: ; BTVER2: # BB#0: @@ -521,17 +521,17 @@ define <8 x float> @test_broadcastss_ymm(float *%a0) { define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: -; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vorpd %ymm2, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; HASWELL-NEXT: vorpd %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: @@ -559,17 +559,17 @@ define <4 x double> @test_cmppd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_cmpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: -; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm2 # sched: [9:1.00] -; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vorps %ymm2, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqps %ymm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] +; HASWELL-NEXT: vcmpeqps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] ; HASWELL-NEXT: vorps %ymm0, %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: @@ -598,16 +598,16 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtdq2pd %xmm0, %ymm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [6:1.00] +; HASWELL-NEXT: vcvtdq2pd (%rdi), %ymm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: @@ -632,19 +632,19 @@ define <4 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [7:1.00] -; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [3:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm1, %ymm1 # sched: [5:1.00] +; SANDY-NEXT: vcvtdq2ps %ymm1, %ymm1 # sched: [4:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [3:1.00] +; HASWELL-NEXT: vcvtdq2ps %ymm0, %ymm0 # sched: [4:1.00] +; HASWELL-NEXT: vcvtdq2ps (%rdi), %ymm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: @@ -669,17 +669,17 @@ define <8 x float> @test_cvtdq2ps(<8 x i32> %a0, <8 x i32> *%a1) { define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttpd2dq %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvttpd2dqy (%rdi), %xmm1 # sched: [10:1.00] ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: @@ -704,17 +704,17 @@ define <8 x i32> @test_cvtpd2dq(<4 x double> %a0, <4 x double> *%a1) { define <8 x float> @test_cvtpd2ps(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [6:1.00] -; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvtpd2ps %ymm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vcvtpd2psy (%rdi), %xmm1 # sched: [9:1.00] ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: @@ -741,15 +741,15 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttps2dq %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vcvttps2dq (%rdi), %ymm1 # sched: [7:1.00] ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: @@ -774,15 +774,15 @@ define <8 x i32> @test_cvtps2dq(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [45:3.00] -; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [52:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [35:2.00] -; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [35:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivpd %ymm1, %ymm0, %ymm0 # sched: [27:2.00] +; HASWELL-NEXT: vdivpd (%rdi), %ymm0, %ymm0 # sched: [31:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: @@ -804,15 +804,15 @@ define <4 x double> @test_divpd(<4 x double> %a0, <4 x double> %a1, <4 x double> define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [29:3.00] -; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [36:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [21:2.00] -; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [21:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivps %ymm1, %ymm0, %ymm0 # sched: [19:2.00] +; HASWELL-NEXT: vdivps (%rdi), %ymm0, %ymm0 # sched: [23:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: @@ -834,15 +834,15 @@ define <8 x float> @test_divps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <8 x float> @test_dpps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [12:2.00] +; SANDY-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vdpps $7, %ymm1, %ymm0, %ymm0 # sched: [14:2.00] -; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [14:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdpps $7, (%rdi), %ymm0, %ymm0 # sched: [18:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: @@ -866,16 +866,16 @@ define <4 x float> @test_extractf128(<8 x float> %a0, <8 x float> %a1, <4 x floa ; SANDY-LABEL: test_extractf128: ; SANDY: # BB#0: ; SANDY-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [1:1.00] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_extractf128: ; HASWELL: # BB#0: ; HASWELL-NEXT: vextractf128 $1, %ymm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vextractf128 $1, %ymm1, (%rdi) # sched: [4:1.00] +; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_extractf128: ; BTVER2: # BB#0: @@ -900,13 +900,13 @@ define <4 x double> @test_haddpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SANDY: # BB#0: ; SANDY-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] ; HASWELL-NEXT: vhaddpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: @@ -929,15 +929,15 @@ declare <4 x double> @llvm.x86.avx.hadd.pd.256(<4 x double>, <4 x double>) nounw define <8 x float> @test_haddps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhaddps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: @@ -960,15 +960,15 @@ declare <8 x float> @llvm.x86.avx.hadd.ps.256(<8 x float>, <8 x float>) nounwind define <4 x double> @test_hsubpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubpd %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhsubpd (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: @@ -991,15 +991,15 @@ declare <4 x double> @llvm.x86.avx.hsub.pd.256(<4 x double>, <4 x double>) nounw define <8 x float> @test_hsubps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubps %ymm1, %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhsubps (%rdi), %ymm0, %ymm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: @@ -1023,16 +1023,16 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float ; SANDY-LABEL: test_insertf128: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [1:1.00] -; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_insertf128: ; HASWELL: # BB#0: ; HASWELL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 # sched: [3:1.00] -; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: vinsertf128 $1, (%rdi), %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_insertf128: ; BTVER2: # BB#0: @@ -1059,13 +1059,13 @@ define <8 x float> @test_insertf128(<8 x float> %a0, <4 x float> %a1, <4 x float define <32 x i8> @test_lddqu(i8* %a0) { ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vlddqu (%rdi), %ymm0 # sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: @@ -1084,17 +1084,17 @@ declare <32 x i8> @llvm.x86.avx.ldu.dq.256(i8*) nounwind readonly define <2 x double> @test_maskmovpd(i8* %a0, <2 x i64> %a1, <2 x double> %a2) { ; SANDY-LABEL: test_maskmovpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [2:2.00] -; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; HASWELL-NEXT: vmaskmovpd (%rdi), %xmm0, %xmm2 # sched: [4:2.00] +; HASWELL-NEXT: vmaskmovpd %xmm1, %xmm0, (%rdi) # sched: [13:1.00] ; HASWELL-NEXT: vmovapd %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maskmovpd: ; BTVER2: # BB#0: @@ -1119,29 +1119,29 @@ declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>) nounwind define <4 x double> @test_maskmovpd_ymm(i8* %a0, <4 x i64> %a1, <4 x double> %a2) { ; SANDY-LABEL: test_maskmovpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; SANDY-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovpd_ymm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [4:1.00] -; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; HASWELL-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [4:2.00] +; HASWELL-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [14:1.00] ; HASWELL-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maskmovpd_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; BTVER2-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; BTVER2-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maskmovpd_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmaskmovpd (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) +; ZNVER1-NEXT: vmaskmovpd %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; ZNVER1-NEXT: vmovapd %ymm2, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %a0, <4 x i64> %a1) @@ -1154,17 +1154,17 @@ declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>) nounwi define <4 x float> @test_maskmovps(i8* %a0, <4 x i32> %a1, <4 x float> %a2) { ; SANDY-LABEL: test_maskmovps: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [8:2.00] -; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [5:1.00] +; SANDY-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [2:2.00] -; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [4:1.00] +; HASWELL-NEXT: vmaskmovps (%rdi), %xmm0, %xmm2 # sched: [4:2.00] +; HASWELL-NEXT: vmaskmovps %xmm1, %xmm0, (%rdi) # sched: [13:1.00] ; HASWELL-NEXT: vmovaps %xmm2, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maskmovps: ; BTVER2: # BB#0: @@ -1189,29 +1189,29 @@ declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>) nounwind define <8 x float> @test_maskmovps_ymm(i8* %a0, <8 x i32> %a1, <8 x float> %a2) { ; SANDY-LABEL: test_maskmovps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; SANDY-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] +; SANDY-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; SANDY-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovps_ymm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [1:0.50] -; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; HASWELL-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [4:2.00] +; HASWELL-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [14:1.00] ; HASWELL-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maskmovps_ymm: ; BTVER2: # BB#0: ; BTVER2-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; BTVER2-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; BTVER2-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_maskmovps_ymm: ; ZNVER1: # BB#0: ; ZNVER1-NEXT: vmaskmovps (%rdi), %ymm0, %ymm2 # sched: [?:0.000000e+00] -; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) +; ZNVER1-NEXT: vmaskmovps %ymm1, %ymm0, (%rdi) # sched: [?:0.000000e+00] ; ZNVER1-NEXT: vmovaps %ymm2, %ymm0 # sched: [1:0.50] ; ZNVER1-NEXT: retq # sched: [4:1.00] %1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %a0, <8 x i32> %a1) @@ -1225,14 +1225,14 @@ define <4 x double> @test_maxpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaxpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: @@ -1256,14 +1256,14 @@ define <8 x float> @test_maxps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaxps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: @@ -1288,13 +1288,13 @@ define <4 x double> @test_minpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vminpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: @@ -1319,13 +1319,13 @@ define <8 x float> @test_minps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY: # BB#0: ; SANDY-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] ; HASWELL-NEXT: vminps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: @@ -1348,17 +1348,17 @@ declare <8 x float> @llvm.x86.avx.min.ps.256(<8 x float>, <8 x float>) nounwind define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [7:0.50] +; SANDY-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovapd (%rdi), %ymm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovapd %ymm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: @@ -1382,17 +1382,17 @@ define <4 x double> @test_movapd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movaps(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [7:0.50] +; SANDY-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovaps (%rdi), %ymm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovaps %ymm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: @@ -1417,16 +1417,16 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [7:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2] sched: [1:1.00] -; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovddup {{.*#+}} ymm1 = mem[0,0,2,2] sched: [4:0.50] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: @@ -1451,15 +1451,15 @@ define <4 x double> @test_movddup(<4 x double> %a0, <4 x double> *%a1) { define i32 @test_movmskpd(<4 x double> %a0) { ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] +; SANDY-NEXT: vmovmskpd %ymm0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovmskpd %ymm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: @@ -1479,15 +1479,15 @@ declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>) nounwind readnone define i32 @test_movmskps(<8 x float> %a0) { ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vmovmskps %ymm0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [5:1.00] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovmskps %ymm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: @@ -1508,14 +1508,14 @@ define <4 x double> @test_movntpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovntpd %ymm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: @@ -1537,14 +1537,14 @@ define <8 x float> @test_movntps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovntps %ymm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: @@ -1566,16 +1566,16 @@ define <8 x float> @test_movshdup(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [7:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7] sched: [1:1.00] -; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovshdup {{.*#+}} ymm1 = mem[1,1,3,3,5,5,7,7] sched: [4:0.50] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: @@ -1601,16 +1601,16 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [7:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6] sched: [1:1.00] -; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovsldup {{.*#+}} ymm1 = mem[0,0,2,2,4,4,6,6] sched: [4:0.50] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: @@ -1635,19 +1635,19 @@ define <8 x float> @test_movsldup(<8 x float> %a0, <8 x float> *%a1) { define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovupd (%rdi), %ymm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddpd %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovupd %ymm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: @@ -1671,19 +1671,19 @@ define <4 x double> @test_movupd(<4 x double> *%a0, <4 x double> *%a1) { define <8 x float> @test_movups(<8 x float> *%a0, <8 x float> *%a1) { ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: vinsertf128 $1, 16(%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [5:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vextractf128 $1, %ymm0, 16(%rsi) # sched: [1:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovups (%rdi), %ymm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddps %ymm0, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovups %ymm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: @@ -1708,14 +1708,14 @@ define <4 x double> @test_mulpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulpd %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vmulpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: @@ -1738,14 +1738,14 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: vmulps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: @@ -1767,17 +1767,17 @@ define <8 x float> @test_mulps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) { ; SANDY-LABEL: orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: orpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: orpd: ; BTVER2: # BB#0: @@ -1806,17 +1806,17 @@ define <4 x double> @orpd(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a2) define <8 x float> @test_orps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) { ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: @@ -1846,16 +1846,16 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_permilpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [7:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] sched: [1:1.00] -; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [1:1.00] +; HASWELL-NEXT: vpermilpd {{.*#+}} xmm1 = mem[1,0] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilpd: ; BTVER2: # BB#0: @@ -1880,17 +1880,17 @@ define <2 x double> @test_permilpd(<2 x double> %a0, <2 x double> *%a1) { define <4 x double> @test_permilpd_ymm(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_permilpd_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [8:1.00] +; SANDY-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; SANDY-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilpd_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3] sched: [1:1.00] ; HASWELL-NEXT: vpermilpd {{.*#+}} ymm1 = mem[1,0,2,3] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilpd_ymm: ; BTVER2: # BB#0: @@ -1916,16 +1916,16 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_permilps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0] sched: [1:1.00] -; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; HASWELL-NEXT: vpermilps {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilps: ; BTVER2: # BB#0: @@ -1950,17 +1950,17 @@ define <4 x float> @test_permilps(<4 x float> %a0, <4 x float> *%a1) { define <8 x float> @test_permilps_ymm(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_permilps_ymm: ; SANDY: # BB#0: -; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [8:1.00] +; SANDY-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; SANDY-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilps_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4] sched: [1:1.00] ; HASWELL-NEXT: vpermilps {{.*#+}} ymm1 = mem[3,2,1,0,7,6,5,4] sched: [5:1.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilps_ymm: ; BTVER2: # BB#0: @@ -1986,14 +1986,14 @@ define <2 x double> @test_permilvarpd(<2 x double> %a0, <2 x i64> %a1, <2 x i64> ; SANDY-LABEL: test_permilvarpd: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpermilpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilvarpd: ; BTVER2: # BB#0: @@ -2018,13 +2018,13 @@ define <4 x double> @test_permilvarpd_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x ; SANDY: # BB#0: ; SANDY-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarpd_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vpermilpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilvarpd_ymm: ; BTVER2: # BB#0: @@ -2048,14 +2048,14 @@ define <4 x float> @test_permilvarps(<4 x float> %a0, <4 x i32> %a1, <4 x i32> * ; SANDY-LABEL: test_permilvarps: ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpermilps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilvarps: ; BTVER2: # BB#0: @@ -2080,13 +2080,13 @@ define <8 x float> @test_permilvarps_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x i3 ; SANDY: # BB#0: ; SANDY-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; SANDY-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_permilvarps_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpermilps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] ; HASWELL-NEXT: vpermilps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_permilvarps_ymm: ; BTVER2: # BB#0: @@ -2112,14 +2112,14 @@ define <8 x float> @test_rcpps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vrcpps (%rdi), %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps (%rdi), %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: @@ -2148,14 +2148,14 @@ define <4 x double> @test_roundpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [7:1.00] +; HASWELL-NEXT: vroundpd $7, %ymm0, %ymm0 # sched: [6:2.00] +; HASWELL-NEXT: vroundpd $7, (%rdi), %ymm1 # sched: [10:2.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: @@ -2184,14 +2184,14 @@ define <8 x float> @test_roundps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [7:1.00] +; HASWELL-NEXT: vroundps $7, %ymm0, %ymm0 # sched: [6:2.00] +; HASWELL-NEXT: vroundps $7, (%rdi), %ymm1 # sched: [10:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: @@ -2217,17 +2217,17 @@ declare <8 x float> @llvm.x86.avx.round.ps.256(<8 x float>, i32) nounwind readno define <8 x float> @test_rsqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [14:3.00] -; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:3.00] +; SANDY-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [5:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrsqrtps (%rdi), %ymm1 # sched: [11:2.00] -; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [11:2.00] +; HASWELL-NEXT: vrsqrtps %ymm0, %ymm0 # sched: [7:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: @@ -2254,16 +2254,16 @@ define <4 x double> @test_shufpd(<4 x double> %a0, <4 x double> %a1, <4 x double ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [8:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[0],ymm0[2],ymm1[3] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [1:1.00] +; HASWELL-NEXT: vshufpd {{.*#+}} ymm1 = ymm1[1],mem[0],ymm1[2],mem[3] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: @@ -2289,14 +2289,14 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4] sched: [1:1.00] -; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,3],mem[0,0],ymm0[4,7],mem[4,4] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: @@ -2318,17 +2318,17 @@ define <8 x float> @test_shufps(<8 x float> %a0, <8 x float> %a1, <8 x float> *% define <4 x double> @test_sqrtpd(<4 x double> %a0, <4 x double> *%a1) { ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [52:3.00] -; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [45:3.00] +; SANDY-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [19:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [35:2.00] -; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [35:2.00] +; HASWELL-NEXT: vsqrtpd (%rdi), %ymm1 # sched: [32:2.00] +; HASWELL-NEXT: vsqrtpd %ymm0, %ymm0 # sched: [28:2.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: @@ -2354,17 +2354,17 @@ declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone define <8 x float> @test_sqrtps(<8 x float> %a0, <8 x float> *%a1) { ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [36:3.00] -; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [29:3.00] +; SANDY-NEXT: vsqrtps %ymm0, %ymm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %ymm1 # sched: [19:1.00] ; SANDY-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [21:2.00] -; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [21:2.00] +; HASWELL-NEXT: vsqrtps (%rdi), %ymm1 # sched: [23:2.00] +; HASWELL-NEXT: vsqrtps %ymm0, %ymm0 # sched: [19:2.00] ; HASWELL-NEXT: vaddps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: @@ -2391,14 +2391,14 @@ define <4 x double> @test_subpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vsubpd (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: @@ -2421,14 +2421,14 @@ define <8 x float> @test_subps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubps %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vsubps (%rdi), %ymm0, %ymm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: @@ -2451,20 +2451,20 @@ define i32 @test_testpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-LABEL: test_testpd: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:1.00] -; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vtestpd %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-NEXT: setb %al # sched: [1:0.50] +; HASWELL-NEXT: vtestpd (%rdi), %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_testpd: ; BTVER2: # BB#0: @@ -2495,22 +2495,22 @@ define i32 @test_testpd_ymm(<4 x double> %a0, <4 x double> %a1, <4 x double> *%a ; SANDY-LABEL: test_testpd_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testpd_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:1.00] -; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vtestpd %ymm1, %ymm0 # sched: [1:0.33] +; HASWELL-NEXT: setb %al # sched: [1:0.50] +; HASWELL-NEXT: vtestpd (%rdi), %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_testpd_ymm: ; BTVER2: # BB#0: @@ -2542,20 +2542,20 @@ define i32 @test_testps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_testps: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testps: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:1.00] -; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vtestps %xmm1, %xmm0 # sched: [1:0.33] +; HASWELL-NEXT: setb %al # sched: [1:0.50] +; HASWELL-NEXT: vtestps (%rdi), %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_testps: ; BTVER2: # BB#0: @@ -2586,22 +2586,22 @@ define i32 @test_testps_ymm(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a2) ; SANDY-LABEL: test_testps_ymm: ; SANDY: # BB#0: ; SANDY-NEXT: xorl %eax, %eax # sched: [1:0.33] -; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [8:1.00] +; SANDY-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] ; SANDY-NEXT: adcl $0, %eax # sched: [1:0.33] ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_testps_ymm: ; HASWELL: # BB#0: ; HASWELL-NEXT: xorl %eax, %eax # sched: [1:0.25] -; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: setb %al # sched: [1:1.00] -; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [1:1.00] -; HASWELL-NEXT: adcl $0, %eax # sched: [1:0.25] -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vtestps %ymm1, %ymm0 # sched: [1:0.33] +; HASWELL-NEXT: setb %al # sched: [1:0.50] +; HASWELL-NEXT: vtestps (%rdi), %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: adcl $0, %eax # sched: [2:0.50] +; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_testps_ymm: ; BTVER2: # BB#0: @@ -2635,14 +2635,14 @@ define <4 x double> @test_unpckhpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; SANDY-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] sched: [1:1.00] ; HASWELL-NEXT: vunpckhpd {{.*#+}} ymm1 = ymm1[1],mem[1],ymm1[3],mem[3] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: @@ -2669,13 +2669,13 @@ define <8 x float> @test_unpckhps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; SANDY-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7] sched: [1:1.00] ; HASWELL-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7] sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: @@ -2698,16 +2698,16 @@ define <4 x double> @test_unpcklpd(<4 x double> %a0, <4 x double> %a1, <4 x doub ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [8:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00] ; SANDY-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [1:1.00] +; HASWELL-NEXT: vunpcklpd {{.*#+}} ymm1 = ymm1[0],mem[0],ymm1[2],mem[2] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm1, %ymm0, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: @@ -2733,14 +2733,14 @@ define <8 x float> @test_unpcklps(<8 x float> %a0, <8 x float> %a1, <8 x float> ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5] sched: [1:1.00] -; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: @@ -2765,14 +2765,14 @@ define <4 x double> @test_xorpd(<4 x double> %a0, <4 x double> %a1, <4 x double> ; SANDY-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vxorpd %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vxorpd (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: @@ -2804,14 +2804,14 @@ define <8 x float> @test_xorps(<8 x float> %a0, <8 x float> %a1, <8 x float> *%a ; SANDY-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] ; SANDY-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: vxorps %ymm1, %ymm0, %ymm0 # sched: [1:1.00] +; HASWELL-NEXT: vxorps (%rdi), %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: @@ -2841,12 +2841,12 @@ define void @test_zeroall() { ; SANDY-LABEL: test_zeroall: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroall # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_zeroall: ; HASWELL: # BB#0: -; HASWELL-NEXT: vzeroall # sched: [16:16.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vzeroall # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_zeroall: ; BTVER2: # BB#0: @@ -2866,12 +2866,12 @@ define void @test_zeroupper() { ; SANDY-LABEL: test_zeroupper: ; SANDY: # BB#0: ; SANDY-NEXT: vzeroupper # sched: [?:0.000000e+00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_zeroupper: ; HASWELL: # BB#0: -; HASWELL-NEXT: vzeroupper # sched: [4:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vzeroupper # sched: [1:0.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_zeroupper: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/avx2-intrinsics-x86.ll b/test/CodeGen/X86/avx2-intrinsics-x86.ll index a82c51747a2..52e37dbf269 100644 --- a/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1619,10 +1619,10 @@ define <8 x float> @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx ; ; AVX512VL-LABEL: test_gather_mask: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] -; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] +; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] ; AVX512VL-NEXT: vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda] -; AVX512VL-NEXT: vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89] +; AVX512VL-NEXT: vgatherdps %ymm3, (%eax,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x88] +; AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] ; AVX512VL-NEXT: vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10] ; AVX512VL-NEXT: retl ## encoding: [0xc3] %a_i8 = bitcast float* %a to i8* diff --git a/test/CodeGen/X86/avx2-schedule.ll b/test/CodeGen/X86/avx2-schedule.ll index ca5a132f4f4..042bc217b97 100644 --- a/test/CodeGen/X86/avx2-schedule.ll +++ b/test/CodeGen/X86/avx2-schedule.ll @@ -9,7 +9,7 @@ define <32 x i8> @test_pabsb(<32 x i8> %a0, <32 x i8> *%a1) { ; HASWELL-NEXT: vpabsb %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpabsb (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pabsb: ; ZNVER1: # BB#0: @@ -29,9 +29,9 @@ define <8 x i32> @test_pabsd(<8 x i32> %a0, <8 x i32> *%a1) { ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsd %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [1:0.50] +; HASWELL-NEXT: vpabsd (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pabsd: ; ZNVER1: # BB#0: @@ -51,9 +51,9 @@ define <16 x i16> @test_pabsw(<16 x i16> %a0, <16 x i16> *%a1) { ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsw %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [1:0.50] +; HASWELL-NEXT: vpabsw (%rdi), %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pabsw: ; ZNVER1: # BB#0: @@ -74,7 +74,7 @@ define <32 x i8> @test_paddb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddb (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_paddb: ; ZNVER1: # BB#0: @@ -92,7 +92,7 @@ define <8 x i32> @test_paddd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_paddd: ; ZNVER1: # BB#0: @@ -109,8 +109,8 @@ define <4 x i64> @test_paddq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddq (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_paddq: ; ZNVER1: # BB#0: @@ -128,7 +128,7 @@ define <16 x i16> @test_paddw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] ; HASWELL-NEXT: vpaddw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_paddw: ; ZNVER1: # BB#0: @@ -145,9 +145,9 @@ define <4 x i64> @test_pand(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpand %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: vpand (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pand: ; ZNVER1: # BB#0: @@ -166,9 +166,9 @@ define <4 x i64> @test_pandn(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpandn %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [1:0.50] +; HASWELL-NEXT: vpandn (%rdi), %ymm0, %ymm1 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pandn: ; ZNVER1: # BB#0: @@ -190,7 +190,7 @@ define <8 x i32> @test_pmulld(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulld %ymm1, %ymm0, %ymm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %ymm0, %ymm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pmulld: ; ZNVER1: # BB#0: @@ -207,8 +207,8 @@ define <16 x i16> @test_pmullw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmullw %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmullw (%rdi), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pmullw: ; ZNVER1: # BB#0: @@ -225,9 +225,9 @@ define <4 x i64> @test_por(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: vpor (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_por: ; ZNVER1: # BB#0: @@ -246,8 +246,8 @@ define <32 x i8> @test_psubb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> *%a2) { ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubb (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_psubb: ; ZNVER1: # BB#0: @@ -264,8 +264,8 @@ define <8 x i32> @test_psubd(<8 x i32> %a0, <8 x i32> %a1, <8 x i32> *%a2) { ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubd (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_psubd: ; ZNVER1: # BB#0: @@ -282,8 +282,8 @@ define <4 x i64> @test_psubq(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubq (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_psubq: ; ZNVER1: # BB#0: @@ -300,8 +300,8 @@ define <16 x i16> @test_psubw(<16 x i16> %a0, <16 x i16> %a1, <16 x i16> *%a2) { ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubw (%rdi), %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_psubw: ; ZNVER1: # BB#0: @@ -318,9 +318,9 @@ define <4 x i64> @test_pxor(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> *%a2) { ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpxor %ymm1, %ymm0, %ymm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [1:0.50] +; HASWELL-NEXT: vpxor (%rdi), %ymm0, %ymm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %ymm1, %ymm0, %ymm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; ZNVER1-LABEL: test_pxor: ; ZNVER1: # BB#0: diff --git a/test/CodeGen/X86/avx512-cmp.ll b/test/CodeGen/X86/avx512-cmp.ll index d2e95f692e4..eae7b94f513 100644 --- a/test/CodeGen/X86/avx512-cmp.ll +++ b/test/CodeGen/X86/avx512-cmp.ll @@ -14,7 +14,6 @@ define double @test1(double %a, double %b) nounwind { ; ALL-NEXT: LBB0_2: ## %l2 ; ALL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: retq -; ALL-NEXT: ## -- End function %tobool = fcmp une double %a, %b br i1 %tobool, label %l1, label %l2 @@ -37,7 +36,6 @@ define float @test2(float %a, float %b) nounwind { ; ALL-NEXT: LBB1_2: ## %l2 ; ALL-NEXT: vaddss %xmm1, %xmm0, %xmm0 ; ALL-NEXT: retq -; ALL-NEXT: ## -- End function %tobool = fcmp olt float %a, %b br i1 %tobool, label %l1, label %l2 @@ -126,11 +124,11 @@ entry: define i32 @test8(i32 %a1, i32 %a2, i32 %a3) { ; ALL-LABEL: test8: ; ALL: ## BB#0: +; ALL-NEXT: notl %edi ; ALL-NEXT: xorl $-2147483648, %esi ## imm = 0x80000000 ; ALL-NEXT: testl %edx, %edx ; ALL-NEXT: movl $1, %eax ; ALL-NEXT: cmovel %eax, %edx -; ALL-NEXT: notl %edi ; ALL-NEXT: orl %edi, %esi ; ALL-NEXT: cmovnel %edx, %eax ; ALL-NEXT: retq diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 295f98ce61b..140299f5495 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -1545,19 +1545,19 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) { } define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { -; KNL-LABEL: uitofp_2i1_float: -; KNL: # BB#0: -; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; KNL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; KNL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; KNL-NEXT: vpextrb $8, %xmm0, %eax -; KNL-NEXT: andl $1, %eax -; KNL-NEXT: vpextrb $0, %xmm0, %ecx -; KNL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; KNL-NEXT: andl $1, %ecx -; KNL-NEXT: vcvtsi2ssl %ecx, %xmm2, %xmm1 -; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] -; KNL-NEXT: retq +; NOVL-LABEL: uitofp_2i1_float: +; NOVL: # BB#0: +; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] +; NOVL-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 +; NOVL-NEXT: vpextrb $8, %xmm0, %eax +; NOVL-NEXT: andl $1, %eax +; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 +; NOVL-NEXT: vpextrb $0, %xmm0, %eax +; NOVL-NEXT: andl $1, %eax +; NOVL-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 +; NOVL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] +; NOVL-NEXT: retq ; ; VL-LABEL: uitofp_2i1_float: ; VL: # BB#0: @@ -1567,34 +1567,6 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) { ; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z} ; VL-NEXT: vcvtudq2ps %xmm0, %xmm0 ; VL-NEXT: retq -; -; AVX512DQ-LABEL: uitofp_2i1_float: -; AVX512DQ: # BB#0: -; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512DQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512DQ-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512DQ-NEXT: vpextrb $8, %xmm0, %eax -; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 -; AVX512DQ-NEXT: vpextrb $0, %xmm0, %eax -; AVX512DQ-NEXT: andl $1, %eax -; AVX512DQ-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; AVX512DQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512DQ-NEXT: retq -; -; AVX512BW-LABEL: uitofp_2i1_float: -; AVX512BW: # BB#0: -; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512BW-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] -; AVX512BW-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: vpextrb $8, %xmm0, %eax -; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm1 -; AVX512BW-NEXT: vpextrb $0, %xmm0, %eax -; AVX512BW-NEXT: andl $1, %eax -; AVX512BW-NEXT: vcvtsi2ssl %eax, %xmm2, %xmm0 -; AVX512BW-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512BW-NEXT: retq %mask = icmp ult <2 x i32> %a, zeroinitializer %1 = uitofp <2 x i1> %mask to <2 x float> ret <2 x float> %1 diff --git a/test/CodeGen/X86/avx512-insert-extract.ll b/test/CodeGen/X86/avx512-insert-extract.ll index 459bd3dabfc..29a5325a0ae 100644 --- a/test/CodeGen/X86/avx512-insert-extract.ll +++ b/test/CodeGen/X86/avx512-insert-extract.ll @@ -12,7 +12,6 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { ; KNL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test1: ; SKX: ## BB#0: @@ -22,7 +21,6 @@ define <16 x float> @test1(<16 x float> %x, float* %br, float %y) nounwind { ; SKX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; SKX-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %rrr = load float, float* %br %rrr2 = insertelement <16 x float> %x, float %rrr, i32 1 %rrr3 = insertelement <16 x float> %rrr2, float %y, i32 14 @@ -38,7 +36,6 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { ; KNL-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; KNL-NEXT: vinsertf32x4 $3, %xmm0, %zmm2, %zmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test2: ; SKX: ## BB#0: @@ -48,7 +45,6 @@ define <8 x double> @test2(<8 x double> %x, double* %br, double %y) nounwind { ; SKX-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SKX-NEXT: vinsertf64x2 $3, %xmm0, %zmm2, %zmm0 ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %rrr = load double, double* %br %rrr2 = insertelement <8 x double> %x, double %rrr, i32 1 %rrr3 = insertelement <8 x double> %rrr2, double %y, i32 6 @@ -62,7 +58,6 @@ define <16 x float> @test3(<16 x float> %x) nounwind { ; KNL-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] ; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test3: ; SKX: ## BB#0: @@ -70,7 +65,6 @@ define <16 x float> @test3(<16 x float> %x) nounwind { ; SKX-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0],xmm1[0],xmm0[2,3] ; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %eee = extractelement <16 x float> %x, i32 4 %rrr2 = insertelement <16 x float> %x, float %eee, i32 1 ret <16 x float> %rrr2 @@ -84,7 +78,6 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind { ; KNL-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test4: ; SKX: ## BB#0: @@ -93,7 +86,6 @@ define <8 x i64> @test4(<8 x i64> %x) nounwind { ; SKX-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 ; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %eee = extractelement <8 x i64> %x, i32 4 %rrr2 = insertelement <8 x i64> %x, i64 %eee, i32 1 ret <8 x i64> %rrr2 @@ -104,13 +96,11 @@ define i32 @test5(<4 x float> %x) nounwind { ; KNL: ## BB#0: ; KNL-NEXT: vextractps $3, %xmm0, %eax ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test5: ; SKX: ## BB#0: ; SKX-NEXT: vextractps $3, %xmm0, %eax ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %ef = extractelement <4 x float> %x, i32 3 %ei = bitcast float %ef to i32 ret i32 %ei @@ -121,13 +111,11 @@ define void @test6(<4 x float> %x, float* %out) nounwind { ; KNL: ## BB#0: ; KNL-NEXT: vextractps $3, %xmm0, (%rdi) ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test6: ; SKX: ## BB#0: ; SKX-NEXT: vextractps $3, %xmm0, (%rdi) ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %ef = extractelement <4 x float> %x, i32 3 store float %ef, float* %out, align 4 ret void @@ -147,7 +135,6 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind { ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test7: ; SKX: ## BB#0: @@ -163,7 +150,6 @@ define float @test7(<16 x float> %x, i32 %ind) nounwind { ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %e = extractelement <16 x float> %x, i32 %ind ret float %e } @@ -182,7 +168,6 @@ define double @test8(<8 x double> %x, i32 %ind) nounwind { ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test8: ; SKX: ## BB#0: @@ -198,7 +183,6 @@ define double @test8(<8 x double> %x, i32 %ind) nounwind { ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %e = extractelement <8 x double> %x, i32 %ind ret double %e } @@ -217,7 +201,6 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind { ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test9: ; SKX: ## BB#0: @@ -233,7 +216,6 @@ define float @test9(<8 x float> %x, i32 %ind) nounwind { ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %e = extractelement <8 x float> %x, i32 %ind ret float %e } @@ -252,7 +234,6 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test10: ; SKX: ## BB#0: @@ -268,7 +249,6 @@ define i32 @test10(<16 x i32> %x, i32 %ind) nounwind { ; SKX-NEXT: popq %rbp ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %e = extractelement <16 x i32> %x, i32 %ind ret i32 %e } @@ -1134,137 +1114,137 @@ define i32 @test_insertelement_v32i1(i32 %a, i32 %b, <32 x i32> %x , <32 x i32> ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $32, %rsp +; KNL-NEXT: xorl %eax, %eax +; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: setb %al ; KNL-NEXT: vpcmpltud %zmm3, %zmm1, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm1 -; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vmovd %edx, %xmm1 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1 ; KNL-NEXT: vpcmpltud %zmm2, %zmm0, %k0 -; KNL-NEXT: kshiftlw $15, %k0, %k1 -; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %eax, %xmm0 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kshiftlw $15, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %edx +; KNL-NEXT: vmovd %edx, %xmm0 +; KNL-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; KNL-NEXT: xorl %eax, %eax -; KNL-NEXT: cmpl %esi, %edi +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 -; KNL-NEXT: setb %al ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 ; KNL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 @@ -1319,8 +1299,8 @@ define i8 @test_iinsertelement_v4i1(i32 %a, i32 %b, <4 x i32> %x , <4 x i32> %y) ; KNL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; KNL-NEXT: vpextrb $4, %xmm0, %ecx ; KNL-NEXT: kmovw %ecx, %k1 -; KNL-NEXT: vpextrb $0, %xmm0, %ecx ; KNL-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} +; KNL-NEXT: vpextrb $0, %xmm0, %ecx ; KNL-NEXT: kmovw %ecx, %k1 ; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] @@ -2144,8 +2124,8 @@ define i16 @test_extractelement_variable_v32i16(<32 x i16> %t1, i32 %index) { define i8 @test_extractelement_variable_v16i8(<16 x i8> %t1, i32 %index) { ; KNL-LABEL: test_extractelement_variable_v16i8: ; KNL: ## BB#0: -; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; KNL-NEXT: andl $15, %edi ; KNL-NEXT: leaq -{{[0-9]+}}(%rsp), %rax ; KNL-NEXT: movb (%rdi,%rax), %al @@ -2176,8 +2156,8 @@ define i8 @test_extractelement_variable_v32i8(<32 x i8> %t1, i32 %index) { ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $64, %rsp -; KNL-NEXT: vmovaps %ymm0, (%rsp) ; KNL-NEXT: ## kill: %EDI %EDI %RDI +; KNL-NEXT: vmovaps %ymm0, (%rsp) ; KNL-NEXT: andl $31, %edi ; KNL-NEXT: movq %rsp, %rax ; KNL-NEXT: movb (%rdi,%rax), %al @@ -2224,9 +2204,9 @@ define i8 @test_extractelement_variable_v64i8(<64 x i8> %t1, i32 %index) { ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-64, %rsp ; KNL-NEXT: subq $128, %rsp +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) ; KNL-NEXT: vmovaps %ymm0, (%rsp) -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $63, %edi ; KNL-NEXT: movq %rsp, %rax ; KNL-NEXT: movb (%rdi,%rax), %al @@ -2315,12 +2295,12 @@ define i8 @test_extractelement_variable_v64i8_indexi8(<64 x i8> %t1, i8 %index) define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, i32 %index) { ; KNL-LABEL: test_extractelement_varible_v2i1: ; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $1, %edi ; KNL-NEXT: movl -24(%rsp,%rdi,8), %eax ; KNL-NEXT: andl $1, %eax @@ -2345,12 +2325,12 @@ define zeroext i8 @test_extractelement_varible_v2i1(<2 x i64> %a, <2 x i64> %b, define zeroext i8 @test_extractelement_varible_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %index) { ; KNL-LABEL: test_extractelement_varible_v4i1: ; KNL: ## BB#0: +; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2 ; KNL-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; KNL-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; KNL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; KNL-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) -; KNL-NEXT: ## kill: %EDI %EDI %RDI ; KNL-NEXT: andl $3, %edi ; KNL-NEXT: movl -24(%rsp,%rdi,4), %eax ; KNL-NEXT: andl $1, %eax diff --git a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll index 3c2e6afd225..86902ac926a 100644 --- a/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -2880,6 +2880,7 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) { ; CHECK-LABEL: test_mask_vextractf32x4: ; CHECK: ## BB#0: +; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1 @@ -2897,7 +2898,6 @@ define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 ; CHECK-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; CHECK-NEXT: vextractf32x4 $2, %zmm1, %xmm1 ; CHECK-NEXT: vpslld $31, %xmm2, %xmm2 ; CHECK-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: retq @@ -2941,6 +2941,7 @@ declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { ; CHECK-LABEL: test_maskz_vextracti32x4: ; CHECK: ## BB#0: +; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; CHECK-NEXT: kmovw %edi, %k0 ; CHECK-NEXT: kshiftlw $12, %k0, %k1 ; CHECK-NEXT: kshiftrw $15, %k1, %k1 @@ -2958,7 +2959,6 @@ define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) { ; CHECK-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; CHECK-NEXT: kmovw %k1, %eax ; CHECK-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 -; CHECK-NEXT: vextracti32x4 $2, %zmm0, %xmm0 ; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 ; CHECK-NEXT: vpsrad $31, %xmm1, %xmm1 ; CHECK-NEXT: vpand %xmm0, %xmm1, %xmm0 diff --git a/test/CodeGen/X86/avx512-mask-op.ll b/test/CodeGen/X86/avx512-mask-op.ll index 0ee86d5fb45..e1a92c60d18 100644 --- a/test/CodeGen/X86/avx512-mask-op.ll +++ b/test/CodeGen/X86/avx512-mask-op.ll @@ -1837,73 +1837,9 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; KNL-NEXT: .cfi_def_cfa_register %rbp ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $32, %rsp -; KNL-NEXT: vmovups 64(%rdi), %zmm2 -; KNL-NEXT: vcmpltps %zmm1, %zmm2, %k2 -; KNL-NEXT: kshiftlw $14, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $15, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: vmovd %ecx, %xmm2 -; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $13, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $12, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $11, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $10, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $9, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $8, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $7, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $6, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $5, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $4, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $3, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $2, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftlw $1, %k2, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 -; KNL-NEXT: kshiftrw $15, %k2, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; KNL-NEXT: vmovups (%rdi), %zmm3 -; KNL-NEXT: vcmpltps %zmm0, %zmm3, %k1 +; KNL-NEXT: vmovups (%rdi), %zmm2 +; KNL-NEXT: vmovups 64(%rdi), %zmm3 +; KNL-NEXT: vcmpltps %zmm1, %zmm3, %k1 ; KNL-NEXT: kshiftlw $14, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax @@ -1967,74 +1903,74 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; KNL-NEXT: kshiftrw $15, %k1, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 -; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k2} {z} -; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 -; KNL-NEXT: kshiftlw $14, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: kshiftlw $15, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %ecx -; KNL-NEXT: vmovd %ecx, %xmm4 -; KNL-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $13, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $12, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $11, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $10, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $9, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $8, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $7, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $6, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $5, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $4, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $3, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $2, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 -; KNL-NEXT: kshiftlw $1, %k0, %k2 -; KNL-NEXT: kshiftrw $15, %k2, %k2 -; KNL-NEXT: kmovw %k2, %eax -; KNL-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; KNL-NEXT: vcmpltps %zmm0, %zmm2, %k2 +; KNL-NEXT: kshiftlw $14, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $15, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: vmovd %ecx, %xmm2 +; KNL-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $13, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $12, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $11, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $10, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm3 -; KNL-NEXT: vmovups 4(%rdi), %zmm4 {%k1} {z} -; KNL-NEXT: vcmpltps %zmm4, %zmm0, %k0 +; KNL-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $9, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $8, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $7, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $6, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $5, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $4, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $3, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $2, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftlw $1, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; KNL-NEXT: kshiftrw $15, %k2, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 +; KNL-NEXT: vmovups 4(%rdi), %zmm3 {%k2} {z} +; KNL-NEXT: vmovups 68(%rdi), %zmm4 {%k1} {z} +; KNL-NEXT: vcmpltps %zmm4, %zmm1, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax @@ -2098,7 +2034,71 @@ define void @ktest_2(<32 x float> %in, float * %base) { ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 -; KNL-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3 +; KNL-NEXT: vcmpltps %zmm3, %zmm0, %k0 +; KNL-NEXT: kshiftlw $14, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kshiftlw $15, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: vmovd %ecx, %xmm3 +; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $13, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $12, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $11, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $10, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $9, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $8, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $7, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $6, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $5, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $4, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $3, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $2, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftlw $1, %k0, %k1 +; KNL-NEXT: kshiftrw $15, %k1, %k1 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; KNL-NEXT: vinserti128 $1, %xmm4, %ymm3, %ymm3 ; KNL-NEXT: vpor %ymm3, %ymm2, %ymm2 ; KNL-NEXT: vextracti128 $1, %ymm2, %xmm3 ; KNL-NEXT: vpmovsxbd %xmm3, %zmm3 @@ -2943,6 +2943,36 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; ; KNL-LABEL: store_64i1: ; KNL: ## BB#0: +; KNL-NEXT: pushq %rbp +; KNL-NEXT: Lcfi9: +; KNL-NEXT: .cfi_def_cfa_offset 16 +; KNL-NEXT: pushq %r15 +; KNL-NEXT: Lcfi10: +; KNL-NEXT: .cfi_def_cfa_offset 24 +; KNL-NEXT: pushq %r14 +; KNL-NEXT: Lcfi11: +; KNL-NEXT: .cfi_def_cfa_offset 32 +; KNL-NEXT: pushq %r13 +; KNL-NEXT: Lcfi12: +; KNL-NEXT: .cfi_def_cfa_offset 40 +; KNL-NEXT: pushq %r12 +; KNL-NEXT: Lcfi13: +; KNL-NEXT: .cfi_def_cfa_offset 48 +; KNL-NEXT: pushq %rbx +; KNL-NEXT: Lcfi14: +; KNL-NEXT: .cfi_def_cfa_offset 56 +; KNL-NEXT: Lcfi15: +; KNL-NEXT: .cfi_offset %rbx, -56 +; KNL-NEXT: Lcfi16: +; KNL-NEXT: .cfi_offset %r12, -48 +; KNL-NEXT: Lcfi17: +; KNL-NEXT: .cfi_offset %r13, -40 +; KNL-NEXT: Lcfi18: +; KNL-NEXT: .cfi_offset %r14, -32 +; KNL-NEXT: Lcfi19: +; KNL-NEXT: .cfi_offset %r15, -24 +; KNL-NEXT: Lcfi20: +; KNL-NEXT: .cfi_offset %rbp, -16 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 @@ -2954,275 +2984,281 @@ define void @store_64i1(<64 x i1>* %a, <64 x i1> %v) { ; KNL-NEXT: vptestmd %zmm3, %zmm3, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r8d ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r9d ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %ecx, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r10d ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r11d ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $2, %ecx, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r14d ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r15d ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $4, %ecx, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r12d ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r13d ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $6, %ecx, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %ebx ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ebp ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $10, %ecx, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %edx ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %esi ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $12, %ecx, %xmm3, %xmm3 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vptestmd %zmm2, %zmm2, %k1 +; KNL-NEXT: vmovd %r9d, %xmm3 +; KNL-NEXT: kmovw %k1, %r9d +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k2 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $1, %r8d, %xmm3, %xmm2 +; KNL-NEXT: vpinsrb $2, %r10d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $3, %r11d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $4, %r14d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $5, %r15d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $6, %r12d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $7, %r13d, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $8, %ebx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $9, %ebp, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $11, %ecx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $12, %edx, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $13, %esi, %xmm2, %xmm2 +; KNL-NEXT: vpinsrb $14, %r9d, %xmm2, %xmm2 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; KNL-NEXT: vpmovsxbd %xmm2, %zmm2 +; KNL-NEXT: vpslld $31, %zmm2, %zmm2 +; KNL-NEXT: vptestmd %zmm2, %zmm2, %k0 +; KNL-NEXT: kmovw %k0, 6(%rdi) +; KNL-NEXT: kshiftlw $14, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r8d +; KNL-NEXT: kshiftlw $15, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r10d +; KNL-NEXT: kshiftlw $13, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r9d +; KNL-NEXT: kshiftlw $12, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r11d +; KNL-NEXT: kshiftlw $11, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r14d +; KNL-NEXT: kshiftlw $10, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r15d +; KNL-NEXT: kshiftlw $9, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r12d +; KNL-NEXT: kshiftlw $8, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %r13d +; KNL-NEXT: kshiftlw $7, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kshiftlw $6, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %esi +; KNL-NEXT: kshiftlw $5, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ebp +; KNL-NEXT: kshiftlw $4, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %ebx +; KNL-NEXT: kshiftlw $3, %k2, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $13, %eax, %xmm3, %xmm2 ; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kshiftlw $2, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: kmovw %k0, %edx +; KNL-NEXT: kshiftlw $1, %k2, %k0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vmovd %r10d, %xmm2 +; KNL-NEXT: kmovw %k0, %r10d +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 +; KNL-NEXT: kshiftrw $15, %k2, %k0 +; KNL-NEXT: vpinsrb $1, %r8d, %xmm2, %xmm1 +; KNL-NEXT: vpinsrb $2, %r9d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, %r11d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, %r14d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, %r15d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, %r12d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, %r13d, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, %esi, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, %ebp, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, %ebx, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $13, %edx, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $14, %r10d, %xmm1, %xmm1 +; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 +; KNL-NEXT: vpslld $31, %zmm1, %zmm1 +; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: kmovw %k0, 4(%rdi) ; KNL-NEXT: kshiftlw $14, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %r8d ; KNL-NEXT: kshiftlw $15, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %r10d ; KNL-NEXT: kshiftlw $13, %k1, %k0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %eax, %xmm3 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %r9d ; KNL-NEXT: kshiftlw $12, %k1, %k0 -; KNL-NEXT: vpinsrb $1, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %r11d ; KNL-NEXT: kshiftlw $11, %k1, %k0 -; KNL-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %r14d ; KNL-NEXT: kshiftlw $10, %k1, %k0 -; KNL-NEXT: vpinsrb $3, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %r15d ; KNL-NEXT: kshiftlw $9, %k1, %k0 -; KNL-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %r12d ; KNL-NEXT: kshiftlw $8, %k1, %k0 -; KNL-NEXT: vpinsrb $5, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %r13d ; KNL-NEXT: kshiftlw $7, %k1, %k0 -; KNL-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %ecx ; KNL-NEXT: kshiftlw $6, %k1, %k0 -; KNL-NEXT: vpinsrb $7, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %esi ; KNL-NEXT: kshiftlw $5, %k1, %k0 -; KNL-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax +; KNL-NEXT: kmovw %k0, %ebp ; KNL-NEXT: kshiftlw $4, %k1, %k0 -; KNL-NEXT: vpinsrb $9, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %ebx ; KNL-NEXT: kshiftlw $3, %k1, %k0 -; KNL-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: kshiftlw $2, %k1, %k0 -; KNL-NEXT: vpinsrb $11, %ecx, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx +; KNL-NEXT: kmovw %k0, %edx ; KNL-NEXT: kshiftlw $1, %k1, %k0 -; KNL-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: vmovd %r10d, %xmm1 +; KNL-NEXT: kmovw %k0, %r10d +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpmovsxbd %xmm2, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vpinsrb $13, %ecx, %xmm3, %xmm2 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL-NEXT: vpinsrb $14, %eax, %xmm2, %xmm1 -; KNL-NEXT: vpinsrb $15, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, 6(%rdi) -; KNL-NEXT: vpmovsxbd %xmm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k1 -; KNL-NEXT: kmovw %k1, 4(%rdi) +; KNL-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm0 +; KNL-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $13, %edx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, %r10d, %xmm0, %xmm0 +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 +; KNL-NEXT: kmovw %k1, 2(%rdi) ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r8d ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r9d ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vmovd %ecx, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r10d ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r11d ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r14d ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r15d ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %r12d ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %r13d ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %edx ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %esi ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %ebp ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ebx ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx +; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: kmovw %k1, %ecx ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0 -; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 -; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 -; KNL-NEXT: kmovw %k0, 2(%rdi) -; KNL-NEXT: kshiftlw $14, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $15, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $13, %k1, %k0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: vmovd %ecx, %xmm0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $12, %k1, %k0 -; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $11, %k1, %k0 -; KNL-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $10, %k1, %k0 -; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $9, %k1, %k0 -; KNL-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $8, %k1, %k0 -; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $7, %k1, %k0 -; KNL-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $6, %k1, %k0 -; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $5, %k1, %k0 -; KNL-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $4, %k1, %k0 -; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $3, %k1, %k0 -; KNL-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftlw $2, %k1, %k0 -; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %eax -; KNL-NEXT: kshiftlw $1, %k1, %k0 -; KNL-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kshiftrw $15, %k0, %k0 -; KNL-NEXT: kmovw %k0, %ecx -; KNL-NEXT: kshiftrw $15, %k1, %k0 -; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 +; KNL-NEXT: vmovd %r9d, %xmm0 +; KNL-NEXT: kmovw %k1, %r9d +; KNL-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $8, %edx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $9, %esi, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; KNL-NEXT: kshiftrw $15, %k0, %k0 +; KNL-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; KNL-NEXT: vpinsrb $14, %r9d, %xmm0, %xmm0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kmovw %k0, (%rdi) +; KNL-NEXT: popq %rbx +; KNL-NEXT: popq %r12 +; KNL-NEXT: popq %r13 +; KNL-NEXT: popq %r14 +; KNL-NEXT: popq %r15 +; KNL-NEXT: popq %rbp ; KNL-NEXT: retq ; ; SKX-LABEL: store_64i1: diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 02ee3adeb7a..2b04b9229b3 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -8,7 +8,6 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind { ; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = fcmp ole <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y ret <16 x float> %max @@ -20,7 +19,6 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind { ; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = fcmp ole <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y ret <8 x double> %max @@ -32,7 +30,6 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin ; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %yp, align 4 %mask = icmp eq <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -45,7 +42,6 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) ; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = icmp uge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y ret <16 x i32> %max @@ -57,7 +53,6 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = icmp eq <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y ret <8 x i64> %max @@ -69,7 +64,6 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = icmp ugt <8 x i64> %x, %y %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y ret <8 x i64> %max @@ -123,14 +117,12 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind { ; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test9: ; SKX: ## BB#0: ; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1 ; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %mask = icmp eq <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max @@ -145,14 +137,12 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind { ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test10: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %mask = fcmp oeq <8 x float> %x, %y %max = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y @@ -164,7 +154,6 @@ define <8 x i32> @test11_unsigned(<8 x i32> %x, <8 x i32> %y) nounwind { ; CHECK: ## BB#0: ; CHECK-NEXT: vpmaxud %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = icmp ugt <8 x i32> %x, %y %max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y ret <8 x i32> %max @@ -179,7 +168,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: ## kill: %AX %AX %EAX ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test12: ; SKX: ## BB#0: @@ -190,7 +178,6 @@ define i16 @test12(<16 x i64> %a, <16 x i64> %b) nounwind { ; SKX-NEXT: ## kill: %AX %AX %EAX ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %res = icmp eq <16 x i64> %a, %b %res1 = bitcast <16 x i1> %res to i16 ret i16 %res1 @@ -343,7 +330,6 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test12_v32i32: ; SKX: ## BB#0: @@ -353,7 +339,6 @@ define i32 @test12_v32i32(<32 x i32> %a, <32 x i32> %b) nounwind { ; SKX-NEXT: kmovd %k0, %eax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %res = icmp eq <32 x i32> %a, %b %res1 = bitcast <32 x i1> %res to i32 ret i32 %res1 @@ -577,72 +562,72 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; KNL-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL-NEXT: vpslld $31, %zmm0, %zmm0 -; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm1 -; KNL-NEXT: vpmovsxwd %ymm1, %zmm1 -; KNL-NEXT: vpslld $31, %zmm1, %zmm1 -; KNL-NEXT: vptestmd %zmm1, %zmm1, %k0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 +; KNL-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) +; KNL-NEXT: vpcmpeqw %ymm6, %ymm2, %ymm0 +; KNL-NEXT: vpmovsxwd %ymm0, %zmm0 +; KNL-NEXT: vpslld $31, %zmm0, %zmm0 +; KNL-NEXT: vptestmd %zmm0, %zmm0, %k0 ; KNL-NEXT: kshiftlw $14, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax ; KNL-NEXT: kshiftlw $15, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vmovd %ecx, %xmm1 -; KNL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; KNL-NEXT: vmovd %ecx, %xmm0 +; KNL-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $13, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $12, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $11, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $10, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $9, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $8, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $7, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $6, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $5, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $4, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $3, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax -; KNL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; KNL-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $2, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 ; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftlw $1, %k0, %k1 ; KNL-NEXT: kshiftrw $15, %k1, %k1 -; KNL-NEXT: kmovw %k1, %ecx -; KNL-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm0 -; KNL-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 -; KNL-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) +; KNL-NEXT: kmovw %k1, %eax +; KNL-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; KNL-NEXT: kshiftrw $15, %k0, %k0 ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 @@ -657,7 +642,6 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { ; KNL-NEXT: movq %rbp, %rsp ; KNL-NEXT: popq %rbp ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test12_v64i16: ; SKX: ## BB#0: @@ -667,7 +651,6 @@ define i64 @test12_v64i16(<64 x i16> %a, <64 x i16> %b) nounwind { ; SKX-NEXT: kmovq %k0, %rax ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %res = icmp eq <64 x i16> %a, %b %res1 = bitcast <64 x i1> %res to i64 ret i64 %res1 @@ -721,7 +704,6 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind ; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1 ; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask = icmp sge <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y ret <16 x i32> %max @@ -733,7 +715,6 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sgt <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -746,7 +727,6 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp sle <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -759,7 +739,6 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask = icmp ule <16 x i32> %x, %y %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %x1 @@ -773,7 +752,6 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3 ; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask1 = icmp eq <16 x i32> %x1, %y1 %mask0 = icmp eq <16 x i32> %x, %y %mask = select <16 x i1> %mask0, <16 x i1> %mask1, <16 x i1> zeroinitializer @@ -788,7 +766,6 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y ; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask1 = icmp sge <8 x i64> %x1, %y1 %mask0 = icmp sle <8 x i64> %x, %y %mask = select <8 x i1> %mask0, <8 x i1> %mask1, <8 x i1> zeroinitializer @@ -803,7 +780,6 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6 ; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask1 = icmp sgt <8 x i64> %x1, %y1 %y = load <8 x i64>, <8 x i64>* %y.ptr, align 4 %mask0 = icmp sgt <8 x i64> %x, %y @@ -819,7 +795,6 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16 ; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask1 = icmp sge <16 x i32> %x1, %y1 %y = load <16 x i32>, <16 x i32>* %y.ptr, align 4 %mask0 = icmp ule <16 x i32> %x, %y @@ -834,7 +809,6 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind { ; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 %y = shufflevector <8 x i64> %y.0, <8 x i64> undef, <8 x i32> zeroinitializer @@ -849,7 +823,6 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind ; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 %y = shufflevector <16 x i32> %y.0, <16 x i32> undef, <16 x i32> zeroinitializer @@ -865,7 +838,6 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32 ; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask1 = icmp sge <16 x i32> %x1, %y1 %yb = load i32, i32* %yb.ptr, align 4 %y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0 @@ -883,7 +855,6 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y ; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1} ; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %mask1 = icmp sge <8 x i64> %x1, %y1 %yb = load i64, i64* %yb.ptr, align 4 %y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0 @@ -949,14 +920,12 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind { ; KNL-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm2 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test30: ; SKX: ## BB#0: ; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %mask = fcmp oeq <4 x double> %x, %y %max = select <4 x i1> %mask, <4 x double> %x, <4 x double> %y @@ -969,14 +938,12 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp ; KNL-NEXT: vcmpltpd (%rdi), %xmm0, %xmm2 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test31: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %y = load <2 x double>, <2 x double>* %yp, align 4 %mask = fcmp olt <2 x double> %x, %y @@ -990,14 +957,12 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp ; KNL-NEXT: vcmpltpd (%rdi), %ymm0, %ymm2 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test32: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %y = load <4 x double>, <4 x double>* %yp, align 4 %mask = fcmp ogt <4 x double> %y, %x @@ -1011,7 +976,6 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp ; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %y = load <8 x double>, <8 x double>* %yp, align 4 %mask = fcmp olt <8 x double> %x, %y %max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %x1 @@ -1024,14 +988,12 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no ; KNL-NEXT: vcmpltps (%rdi), %xmm0, %xmm2 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test34: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %y = load <4 x float>, <4 x float>* %yp, align 4 %mask = fcmp olt <4 x float> %x, %y %max = select <4 x i1> %mask, <4 x float> %x, <4 x float> %x1 @@ -1048,14 +1010,12 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test35: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %y = load <8 x float>, <8 x float>* %yp, align 4 %mask = fcmp ogt <8 x float> %y, %x @@ -1069,7 +1029,6 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp ; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %y = load <16 x float>, <16 x float>* %yp, align 4 %mask = fcmp olt <16 x float> %x, %y %max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %x1 @@ -1082,7 +1041,6 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou ; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <8 x double> undef, double %a, i32 0 @@ -1100,14 +1058,12 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou ; KNL-NEXT: vcmpltpd %ymm2, %ymm0, %ymm2 ; KNL-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test38: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1 ; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <4 x double> undef, double %a, i32 0 @@ -1125,14 +1081,12 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou ; KNL-NEXT: vcmpltpd %xmm2, %xmm0, %xmm2 ; KNL-NEXT: vblendvpd %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test39: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1 ; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <2 x double> undef, double %a, i32 0 @@ -1150,7 +1104,6 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n ; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; CHECK-NEXT: retq -; CHECK-NEXT: ## -- End function %a = load float, float* %ptr %v = insertelement <16 x float> undef, float %a, i32 0 @@ -1171,14 +1124,12 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun ; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: ## kill: %YMM0 %YMM0 %ZMM0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test41: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1 ; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %a = load float, float* %ptr %v = insertelement <8 x float> undef, float %a, i32 0 @@ -1196,14 +1147,12 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun ; KNL-NEXT: vcmpltps %xmm2, %xmm0, %xmm2 ; KNL-NEXT: vblendvps %xmm2, %xmm0, %xmm1, %xmm0 ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test42: ; SKX: ## BB#0: ; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1 ; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %a = load float, float* %ptr %v = insertelement <4 x float> undef, float %a, i32 0 @@ -1223,7 +1172,6 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x ; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; KNL-NEXT: retq -; KNL-NEXT: ## -- End function ; ; SKX-LABEL: test43: ; SKX: ## BB#0: @@ -1232,7 +1180,6 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x ; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1} ; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ; SKX-NEXT: retq -; SKX-NEXT: ## -- End function %a = load double, double* %ptr %v = insertelement <8 x double> undef, double %a, i32 0 diff --git a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index faf90a16d30..c7db4ded181 100644 --- a/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -1695,6 +1695,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .Lcfi14: ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 +; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 +; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al @@ -1715,39 +1717,39 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: kmovd %ecx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 ; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %edx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %ebx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %eax, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al @@ -1756,8 +1758,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al @@ -1766,8 +1768,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %al @@ -1775,8 +1777,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: andb $2, %al @@ -1785,8 +1787,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %dl @@ -1797,8 +1799,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al @@ -1806,8 +1808,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1817,8 +1819,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1828,8 +1830,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1839,8 +1841,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1850,8 +1852,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1860,8 +1862,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1872,8 +1874,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %bl @@ -1885,8 +1887,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %dl @@ -1895,8 +1897,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1906,8 +1908,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1918,8 +1920,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1929,8 +1931,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: # kill: %AL %AL %EAX %EAX @@ -1940,8 +1942,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -1950,8 +1952,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -1960,444 +1962,444 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: kmovd %edx, %k1 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm3 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm5 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm6 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm6, %ymm5, %ymm5 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm6[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $28, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm5 -; AVX512F-32-NEXT: vpbroadcastd %xmm5, %xmm5 -; AVX512F-32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm6 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm7 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm6 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm6 = zmm6[0,1,2,3],zmm7[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm6, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: movl %ecx, %esi ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm6 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm7 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm2, %ymm7, %ymm7 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $30, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $7, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %ch, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %ch, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000 ; AVX512F-32-NEXT: shrl $12, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $13, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000 ; AVX512F-32-NEXT: shrl $14, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000 ; AVX512F-32-NEXT: shrl $15, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %ebx ; AVX512F-32-NEXT: shrl $16, %ebx ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: andb $15, %al ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: shrb $2, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: shrb $3, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: # kill: %BL %BL %EBX %EBX ; AVX512F-32-NEXT: shrb $7, %bl ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $24, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 ; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vpbroadcastd %xmm4, %xmm4 -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: shrl $28, %eax +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: kmovd %eax, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4 -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 @@ -2405,12 +2407,12 @@ define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) { ; AVX512F-32-NEXT: kmovd %eax, %k1 ; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 ; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: vpcmpgtb %zmm0, %zmm1, %k2 {%k1} -; AVX512F-32-NEXT: vpcmpleb %zmm1, %zmm0, %k3 {%k1} -; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k4 {%k1} -; AVX512F-32-NEXT: vpcmpleb %zmm0, %zmm1, %k5 {%k1} -; AVX512F-32-NEXT: vpcmpgtb %zmm1, %zmm0, %k1 {%k1} +; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1} +; AVX512F-32-NEXT: vpcmpgtb %zmm5, %zmm6, %k2 {%k1} +; AVX512F-32-NEXT: vpcmpleb %zmm6, %zmm5, %k3 {%k1} +; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1} +; AVX512F-32-NEXT: vpcmpleb %zmm5, %zmm6, %k5 {%k1} +; AVX512F-32-NEXT: vpcmpgtb %zmm6, %zmm5, %k1 {%k1} ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx @@ -2587,6 +2589,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: .cfi_offset %esi, -12 ; AVX512F-32-NEXT: .Lcfi24: ; AVX512F-32-NEXT: .cfi_offset %ebx, -8 +; AVX512F-32-NEXT: vmovdqa64 %zmm1, %zmm6 +; AVX512F-32-NEXT: vmovdqa64 %zmm0, %zmm5 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al @@ -2607,39 +2611,39 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: kmovd %ecx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 ; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %edx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpslld $24, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %ebx, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 ; AVX512F-32-NEXT: kmovd %eax, %k0 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 ; AVX512F-32-NEXT: vpsllq $40, %xmm3, %xmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm2, %ymm3, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm2, %ymm3, %ymm3 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm3[0,1,2,3],zmm2[4,5,6,7] ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al @@ -2648,8 +2652,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %cl, %al @@ -2658,8 +2662,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %al @@ -2667,8 +2671,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: andb $2, %al @@ -2677,8 +2681,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %ch, %dl @@ -2689,8 +2693,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al @@ -2698,8 +2702,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2709,8 +2713,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2720,8 +2724,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2731,8 +2735,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2742,8 +2746,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2752,8 +2756,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2764,8 +2768,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %bl @@ -2777,8 +2781,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %dl @@ -2787,8 +2791,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2798,8 +2802,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2810,8 +2814,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2821,8 +2825,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: # kill: %AL %AL %EAX %EAX @@ -2832,8 +2836,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 ; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax @@ -2842,8 +2846,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 ; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 ; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] ; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl @@ -2852,444 +2856,444 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: kmovd %edx, %k1 ; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 ; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm3 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm5 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm3, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm6 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm6, %ymm5, %ymm5 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm6[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $28, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm5 -; AVX512F-32-NEXT: vpbroadcastd %xmm5, %xmm5 -; AVX512F-32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm6 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm7 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm7, %ymm6, %ymm6 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm6 = zmm6[0,1,2,3],zmm7[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm6, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm0 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm0, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm4[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: movl %ecx, %esi ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm6 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm6 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm6[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm7 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm2, %ymm7, %ymm7 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm2 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm2[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $30, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255] +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %esi, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] -; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2 -; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7] -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0 +; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7] +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx ; AVX512F-32-NEXT: kmovd %ecx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm7 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm7, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %cl, %al ; AVX512F-32-NEXT: shrb $7, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %ch, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: andb $2, %al ; AVX512F-32-NEXT: shrb %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %ch, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: shrb $3, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000 ; AVX512F-32-NEXT: shrl $12, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $13, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000 ; AVX512F-32-NEXT: shrl $14, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000 ; AVX512F-32-NEXT: shrl $15, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0] -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0] +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %ebx ; AVX512F-32-NEXT: shrl $16, %ebx ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllw $8, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: andb $15, %al ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: shrb $2, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: shrb $3, %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslld $24, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $4, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastd %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $5, %al ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $40, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %bl, %al ; AVX512F-32-NEXT: shrb $6, %al ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: # kill: %BL %BL %EBX %EBX ; AVX512F-32-NEXT: shrb $7, %bl ; AVX512F-32-NEXT: kmovd %ebx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpsllq $56, %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $24, %eax ; AVX512F-32-NEXT: kmovd %eax, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpbroadcastq %xmm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $2, %dl ; AVX512F-32-NEXT: shrb %dl ; AVX512F-32-NEXT: kmovd %edx, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6] -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6] +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-32-NEXT: movb %al, %dl ; AVX512F-32-NEXT: andb $15, %dl ; AVX512F-32-NEXT: movb %dl, %al ; AVX512F-32-NEXT: shrb $2, %dl ; AVX512F-32-NEXT: kmovd %edx, %k0 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2 -; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm3, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vpbroadcastw %xmm0, %xmm0 +; AVX512F-32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm4, %ymm0, %ymm0 ; AVX512F-32-NEXT: shrb $3, %al -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4] -; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm3, %ymm3 -; AVX512F-32-NEXT: movl %ecx, %eax -; AVX512F-32-NEXT: shrl $28, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vpbroadcastd %xmm4, %xmm4 -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] -; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm1 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4] +; AVX512F-32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm4, %ymm1, %ymm1 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $29, %eax ; AVX512F-32-NEXT: andb $1, %al ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1 -; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2] -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm3, %ymm4, %ymm3 +; AVX512F-32-NEXT: movl %ecx, %eax +; AVX512F-32-NEXT: shrl $28, %eax +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: kmovd %eax, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3 +; AVX512F-32-NEXT: vpbroadcastd %xmm3, %xmm3 +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k1 +; AVX512F-32-NEXT: vpmovm2b %k1, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2] +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255] +; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm1, %ymm3, %ymm1 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $30, %eax -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2 -; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm0 +; AVX512F-32-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512F-32-NEXT: kmovd %eax, %k0 -; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4 -; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4 -; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4 -; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3 -; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2 -; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0 +; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3 +; AVX512F-32-NEXT: vpbroadcastw %xmm3, %xmm3 +; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3 +; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1 +; AVX512F-32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512F-32-NEXT: vpmovb2m %zmm0, %k0 ; AVX512F-32-NEXT: movl %ecx, %eax ; AVX512F-32-NEXT: shrl $31, %eax ; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0 @@ -3297,12 +3301,12 @@ define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %m ; AVX512F-32-NEXT: kmovd %eax, %k1 ; AVX512F-32-NEXT: kshiftlq $63, %k1, %k1 ; AVX512F-32-NEXT: korq %k1, %k0, %k1 -; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} -; AVX512F-32-NEXT: vpcmpltub %zmm1, %zmm0, %k2 {%k1} -; AVX512F-32-NEXT: vpcmpleub %zmm1, %zmm0, %k3 {%k1} -; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k4 {%k1} -; AVX512F-32-NEXT: vpcmpnltub %zmm1, %zmm0, %k5 {%k1} -; AVX512F-32-NEXT: vpcmpnleub %zmm1, %zmm0, %k1 {%k1} +; AVX512F-32-NEXT: vpcmpeqb %zmm6, %zmm5, %k0 {%k1} +; AVX512F-32-NEXT: vpcmpltub %zmm6, %zmm5, %k2 {%k1} +; AVX512F-32-NEXT: vpcmpleub %zmm6, %zmm5, %k3 {%k1} +; AVX512F-32-NEXT: vpcmpneqb %zmm6, %zmm5, %k4 {%k1} +; AVX512F-32-NEXT: vpcmpnltub %zmm6, %zmm5, %k5 {%k1} +; AVX512F-32-NEXT: vpcmpnleub %zmm6, %zmm5, %k1 {%k1} ; AVX512F-32-NEXT: kmovq %k0, (%esp) ; AVX512F-32-NEXT: movl (%esp), %eax ; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 9f344c82f6b..f4504ed07fc 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -2695,32 +2695,32 @@ declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_cmp_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02] -; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03] ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] -; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8] -; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0] -; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] -; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1] -; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01] +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] +; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] +; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ## encoding: [0x62,0xf3,0x75,0x28,0x3f,0xc0,0x02] +; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] +; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] +; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] +; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] +; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] +; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] +; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] +; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] +; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] +; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -2750,23 +2750,23 @@ define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) { ; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] +; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] +; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] ; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] -; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02] +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] +; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] +; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02] -; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03] +; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] +; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) @@ -2793,32 +2793,32 @@ declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) noun define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_ucmp_b_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] -; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %eax, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0] -; CHECK-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd1,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd2,0x02] -; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm2, %xmm2 ## encoding: [0xc4,0xe3,0x69,0x22,0xd0,0x03] ; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0] ; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] ; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02] ; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] -; CHECK-NEXT: vmovd %eax, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc8] -; CHECK-NEXT: vpunpckldq %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x62,0xc0] -; CHECK-NEXT: ## xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-NEXT: vmovd %edx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca] -; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xc1] -; CHECK-NEXT: ## xmm0 = xmm0[0],xmm1[0] -; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xc2,0x01] +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04] +; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] +; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05] +; CHECK-NEXT: kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8] +; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01] +; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02] +; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0] +; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] +; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] +; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9] +; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0] +; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9] +; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; CHECK-NEXT: vmovd %edx, %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2] +; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca] +; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0] +; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1) %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0 @@ -2848,23 +2848,23 @@ define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) ; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01] ; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8] ; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02] -; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] -; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] +; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0] +; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] ; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0] -; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] +; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04] ; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] +; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05] ; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8] -; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] -; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] -; CHECK-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] +; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06] +; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0] +; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01] +; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] ; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] ; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8] ; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01] -; CHECK-NEXT: vpinsrd $2, %edx, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xca,0x02] -; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0] -; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0] -; CHECK-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xc8,0x03] +; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02] +; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03] ; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01] ; CHECK-NEXT: retq ## encoding: [0xc3] %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask) diff --git a/test/CodeGen/X86/bitcast-and-setcc-256.ll b/test/CodeGen/X86/bitcast-and-setcc-256.ll index 21b06915182..a6d6ca15530 100644 --- a/test/CodeGen/X86/bitcast-and-setcc-256.ll +++ b/test/CodeGen/X86/bitcast-and-setcc-256.ll @@ -453,10 +453,10 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8> %b, <32 x i8> %c, <32 x i8> %d) { ; SSE2-SSSE3-NEXT: pcmpgtb %xmm2, %xmm0 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm3, %xmm1 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm6, %xmm4 +; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 ; SSE2-SSSE3-NEXT: pcmpgtb %xmm7, %xmm5 ; SSE2-SSSE3-NEXT: pand %xmm1, %xmm5 ; SSE2-SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) -; SSE2-SSSE3-NEXT: pand %xmm0, %xmm4 ; SSE2-SSSE3-NEXT: movdqa %xmm4, -{{[0-9]+}}(%rsp) ; SSE2-SSSE3-NEXT: movb -{{[0-9]+}}(%rsp), %al ; SSE2-SSSE3-NEXT: andb $1, %al diff --git a/test/CodeGen/X86/extractelement-legalization-store-ordering.ll b/test/CodeGen/X86/extractelement-legalization-store-ordering.ll index 2ebd30adcdc..5d5cbc76f92 100644 --- a/test/CodeGen/X86/extractelement-legalization-store-ordering.ll +++ b/test/CodeGen/X86/extractelement-legalization-store-ordering.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple i386-apple-darwin -mcpu=yonah | FileCheck %s target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" @@ -7,32 +6,31 @@ target datalayout = "e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" ; into loads, off the stack or a previous store. ; Be very explicit about the ordering/stack offsets. +; CHECK-LABEL: test_extractelement_legalization_storereuse: +; CHECK: # BB#0 +; CHECK-NEXT: pushl %ebx +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: movl 16(%esp), %eax +; CHECK-NEXT: movl 24(%esp), %ecx +; CHECK-NEXT: movl 20(%esp), %edx +; CHECK-NEXT: paddd (%edx), %xmm0 +; CHECK-NEXT: movdqa %xmm0, (%edx) +; CHECK-NEXT: movl (%edx), %esi +; CHECK-NEXT: movl 4(%edx), %edi +; CHECK-NEXT: shll $4, %ecx +; CHECK-NEXT: movl 8(%edx), %ebx +; CHECK-NEXT: movl 12(%edx), %edx +; CHECK-NEXT: movl %esi, 12(%eax,%ecx) +; CHECK-NEXT: movl %edi, (%eax,%ecx) +; CHECK-NEXT: movl %ebx, 8(%eax,%ecx) +; CHECK-NEXT: movl %edx, 4(%eax,%ecx) +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: retl define void @test_extractelement_legalization_storereuse(<4 x i32> %a, i32* nocapture %x, i32* nocapture readonly %y, i32 %i) #0 { -; CHECK-LABEL: _test_extractelement_legalization_storereuse: ## @test_extractelement_legalization_storereuse -; CHECK: ## BB#0: ## %entry -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: pushl %edi -; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: paddd (%ecx), %xmm0 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: movdqa %xmm0, (%ecx) -; CHECK-NEXT: movl (%ecx), %esi -; CHECK-NEXT: movl 4(%ecx), %edi -; CHECK-NEXT: shll $4, %edx -; CHECK-NEXT: movl 8(%ecx), %ebx -; CHECK-NEXT: movl 12(%ecx), %ecx -; CHECK-NEXT: movl %esi, 12(%eax,%edx) -; CHECK-NEXT: movl %edi, (%eax,%edx) -; CHECK-NEXT: movl %ebx, 8(%eax,%edx) -; CHECK-NEXT: movl %ecx, 4(%eax,%edx) -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: retl -; CHECK-NEXT: ## -- End function entry: %0 = bitcast i32* %y to <4 x i32>* %1 = load <4 x i32>, <4 x i32>* %0, align 16 diff --git a/test/CodeGen/X86/fp128-i128.ll b/test/CodeGen/X86/fp128-i128.ll index 98082ec611d..6c6bc8bdc1d 100644 --- a/test/CodeGen/X86/fp128-i128.ll +++ b/test/CodeGen/X86/fp128-i128.ll @@ -50,8 +50,8 @@ define void @TestUnionLD1(fp128 %s, i64 %n) #0 { ; CHECK-NEXT: andq %rdi, %rcx ; CHECK-NEXT: movabsq $-281474976710656, %rdx # imm = 0xFFFF000000000000 ; CHECK-NEXT: andq -{{[0-9]+}}(%rsp), %rdx -; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: orq %rcx, %rdx +; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: jmp foo # TAILCALL diff --git a/test/CodeGen/X86/gather-addresses.ll b/test/CodeGen/X86/gather-addresses.ll index 3383b6e2f4c..c3109673468 100644 --- a/test/CodeGen/X86/gather-addresses.ll +++ b/test/CodeGen/X86/gather-addresses.ll @@ -16,10 +16,11 @@ ; LIN: sarq $32, %r[[REG2]] ; LIN: movslq %e[[REG4]], %r[[REG3:.+]] ; LIN: sarq $32, %r[[REG4]] -; LIN: movsd (%rdi,%rsi,8), %xmm1 -; LIN: movhpd (%rdi,%rax,8), %xmm1 -; LIN: movdqa (%rsi), %xmm0 -; LIN: movq %rdi, %xmm1 +; LIN: movsd (%rdi,%r[[REG1]],8), %xmm0 +; LIN: movhpd (%rdi,%r[[REG2]],8), %xmm0 +; LIN: movsd (%rdi,%r[[REG3]],8), %xmm1 +; LIN: movhpd (%rdi,%r[[REG4]],8), %xmm1 + ; WIN: movdqa (%rdx), %xmm0 ; WIN: pand (%r8), %xmm0 ; WIN: pextrq $1, %xmm0, %r[[REG4:.+]] @@ -28,10 +29,10 @@ ; WIN: sarq $32, %r[[REG2]] ; WIN: movslq %e[[REG4]], %r[[REG3:.+]] ; WIN: sarq $32, %r[[REG4]] -; WIN: movsd (%rcx,%r9,8), %xmm1 -; WIN: movhpd (%rcx,%rax,8), %xmm1 -; WIN: movdqa (%rdx), %xmm0 -; WIN: movq %rdx, %xmm1 +; WIN: movsd (%rcx,%r[[REG1]],8), %xmm0 +; WIN: movhpd (%rcx,%r[[REG2]],8), %xmm0 +; WIN: movsd (%rcx,%r[[REG3]],8), %xmm1 +; WIN: movhpd (%rcx,%r[[REG4]],8), %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>, <4 x i32>* %i diff --git a/test/CodeGen/X86/half.ll b/test/CodeGen/X86/half.ll index f72dfa1eef5..4c8003f0c51 100644 --- a/test/CodeGen/X86/half.ll +++ b/test/CodeGen/X86/half.ll @@ -1,834 +1,266 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -fixup-byte-word-insts=1 \ -; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,NOF16-BWINSTS -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -fixup-byte-word-insts=0 \ -; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF,NOF16-NOBWINSTS -; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -fixup-byte-word-insts=1 \ -; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,CHECK-F16C -; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=1 \ +; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWON +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=-f16c -asm-verbose=false -fixup-byte-word-insts=0 \ +; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LIBCALL -check-prefix=BWOFF +; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -mattr=+f16c -asm-verbose=false -fixup-byte-word-insts=1 \ +; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-F16C -check-prefix=BWON +; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -asm-verbose=false -fixup-byte-word-insts=0 \ ; RUN: | FileCheck %s -check-prefix=CHECK-I686 -define void @test_load_store(half* %in, half* %out) #0 { -; BWON-LABEL: test_load_store: -; BWON: # BB#0: -; BWON-NEXT: movzwl (%rdi), %eax -; BWON-NEXT: movw %ax, (%rsi) -; BWON-NEXT: retq -; -; BWOFF-LABEL: test_load_store: -; BWOFF: # BB#0: -; BWOFF-NEXT: movw (%rdi), %ax -; BWOFF-NEXT: movw %ax, (%rsi) -; BWOFF-NEXT: retq -; -; CHECK-I686-LABEL: test_load_store: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-I686-NEXT: movw (%ecx), %cx -; CHECK-I686-NEXT: movw %cx, (%eax) -; CHECK-I686-NEXT: retl +define void @test_load_store(half* %in, half* %out) { +; CHECK-LABEL: test_load_store: +; BWON: movzwl (%rdi), %eax +; BWOFF: movw (%rdi), %ax +; CHECK: movw %ax, (%rsi) %val = load half, half* %in store half %val, half* %out ret void } -define i16 @test_bitcast_from_half(half* %addr) #0 { -; BWON-LABEL: test_bitcast_from_half: -; BWON: # BB#0: -; BWON-NEXT: movzwl (%rdi), %eax -; BWON-NEXT: retq -; -; BWOFF-LABEL: test_bitcast_from_half: -; BWOFF: # BB#0: -; BWOFF-NEXT: movw (%rdi), %ax -; BWOFF-NEXT: retq -; -; CHECK-I686-LABEL: test_bitcast_from_half: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movw (%eax), %ax -; CHECK-I686-NEXT: retl +define i16 @test_bitcast_from_half(half* %addr) { +; CHECK-LABEL: test_bitcast_from_half: +; BWON: movzwl (%rdi), %eax +; BWOFF: movw (%rdi), %ax %val = load half, half* %addr %val_int = bitcast half %val to i16 ret i16 %val_int } -define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { +define void @test_bitcast_to_half(half* %addr, i16 %in) { ; CHECK-LABEL: test_bitcast_to_half: -; CHECK: # BB#0: -; CHECK-NEXT: movw %si, (%rdi) -; CHECK-NEXT: retq -; -; CHECK-I686-LABEL: test_bitcast_to_half: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-I686-NEXT: movw %ax, (%ecx) -; CHECK-I686-NEXT: retl +; CHECK: movw %si, (%rdi) %val_fp = bitcast i16 %in to half store half %val_fp, half* %addr ret void } -define float @test_extend32(half* %addr) #0 { -; CHECK-LIBCALL-LABEL: test_extend32: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee # TAILCALL -; -; CHECK-F16C-LABEL: test_extend32: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_extend32: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movzwl (%eax), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: addl $12, %esp -; CHECK-I686-NEXT: retl +define float @test_extend32(half* %addr) { +; CHECK-LABEL: test_extend32: + +; CHECK-LIBCALL: jmp __gnu_h2f_ieee +; CHECK-F16C: vcvtph2ps %val16 = load half, half* %addr %val32 = fpext half %val16 to float ret float %val32 } -define double @test_extend64(half* %addr) #0 { -; CHECK-LIBCALL-LABEL: test_extend64: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rax -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-LIBCALL-NEXT: popq %rax -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_extend64: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_extend64: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movzwl (%eax), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: addl $12, %esp -; CHECK-I686-NEXT: retl +define double @test_extend64(half* %addr) { +; CHECK-LABEL: test_extend64: + +; CHECK-LIBCALL: callq __gnu_h2f_ieee +; CHECK-LIBCALL: cvtss2sd +; CHECK-F16C: vcvtph2ps +; CHECK-F16C: vcvtss2sd %val16 = load half, half* %addr %val32 = fpext half %val16 to double ret double %val32 } -define void @test_trunc32(float %in, half* %addr) #0 { -; CHECK-LIBCALL-LABEL: test_trunc32: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: movq %rdi, %rbx -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_trunc32: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovd %xmm0, %eax -; CHECK-F16C-NEXT: movw %ax, (%rdi) -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_trunc32: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $8, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: movss %xmm0, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, (%esi) -; CHECK-I686-NEXT: addl $8, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: retl +define void @test_trunc32(float %in, half* %addr) { +; CHECK-LABEL: test_trunc32: + +; CHECK-LIBCALL: callq __gnu_f2h_ieee +; CHECK-F16C: vcvtps2ph %val16 = fptrunc float %in to half store half %val16, half* %addr ret void } -define void @test_trunc64(double %in, half* %addr) #0 { +define void @test_trunc64(double %in, half* %addr) { ; CHECK-LABEL: test_trunc64: -; CHECK: # BB#0: -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: callq __truncdfhf2 -; CHECK-NEXT: movw %ax, (%rbx) -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: retq -; -; CHECK-I686-LABEL: test_trunc64: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $8, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-I686-NEXT: movsd %xmm0, (%esp) -; CHECK-I686-NEXT: calll __truncdfhf2 -; CHECK-I686-NEXT: movw %ax, (%esi) -; CHECK-I686-NEXT: addl $8, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: retl + +; CHECK-LIBCALL: callq __truncdfhf2 +; CHECK-F16C: callq __truncdfhf2 %val16 = fptrunc double %in to half store half %val16, half* %addr ret void } define i64 @test_fptosi_i64(half* %p) #0 { -; CHECK-LIBCALL-LABEL: test_fptosi_i64: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rax -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax -; CHECK-LIBCALL-NEXT: popq %rcx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_fptosi_i64: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: vcvttss2si %xmm0, %rax -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_fptosi_i64: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movzwl (%eax), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstps (%esp) -; CHECK-I686-NEXT: calll __fixsfdi -; CHECK-I686-NEXT: addl $12, %esp -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_fptosi_i64: + +; CHECK-LIBCALL-NEXT: pushq %rax +; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax +; CHECK-LIBCALL-NEXT: popq %rcx +; CHECK-LIBCALL-NEXT: retq + +; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvttss2si [[REG2]], %rax +; CHECK-F16C-NEXT: retq %a = load half, half* %p, align 2 %r = fptosi half %a to i64 ret i64 %r } define void @test_sitofp_i64(i64 %a, half* %p) #0 { -; CHECK-LIBCALL-LABEL: test_sitofp_i64: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: movq %rsi, %rbx -; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_sitofp_i64: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovd %xmm0, %eax -; CHECK-F16C-NEXT: movw %ax, (%rsi) -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_sitofp_i64: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $24, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: movss %xmm0, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, (%esi) -; CHECK-I686-NEXT: addl $24, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_sitofp_i64: + +; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z]+]] +; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] +; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) +; CHECK_LIBCALL-NEXT: popq [[ADDR]] +; CHECK_LIBCALL-NEXT: retq + +; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG0:%[a-z0-9]+]], [[REG0]] +; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG0]], [[REG0]] +; CHECK-F16C-NEXT: vmovd [[REG0]], %eax +; CHECK-F16C-NEXT: movw %ax, (%rsi) +; CHECK-F16C-NEXT: retq %r = sitofp i64 %a to half store half %r, half* %p ret void } define i64 @test_fptoui_i64(half* %p) #0 { -; CHECK-LIBCALL-LABEL: test_fptoui_i64: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rax -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-LIBCALL-NEXT: movaps %xmm0, %xmm2 -; CHECK-LIBCALL-NEXT: subss %xmm1, %xmm2 -; CHECK-LIBCALL-NEXT: cvttss2si %xmm2, %rcx -; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 -; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax -; CHECK-LIBCALL-NEXT: xorq %rcx, %rdx -; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 -; CHECK-LIBCALL-NEXT: cmovaeq %rdx, %rax -; CHECK-LIBCALL-NEXT: popq %rcx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_fptoui_i64: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-F16C-NEXT: vsubss %xmm1, %xmm0, %xmm2 -; CHECK-F16C-NEXT: vcvttss2si %xmm2, %rcx -; CHECK-F16C-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 -; CHECK-F16C-NEXT: vcvttss2si %xmm0, %rax -; CHECK-F16C-NEXT: xorq %rcx, %rdx -; CHECK-F16C-NEXT: vucomiss %xmm1, %xmm0 -; CHECK-F16C-NEXT: cmovaeq %rdx, %rax -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_fptoui_i64: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movzwl (%eax), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstps (%esp) -; CHECK-I686-NEXT: calll __fixunssfdi -; CHECK-I686-NEXT: addl $12, %esp -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_fptoui_i64: + +; FP_TO_UINT is expanded using FP_TO_SINT +; CHECK-LIBCALL-NEXT: pushq %rax +; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss {{.[A-Z_0-9]+}}(%rip), [[REG1:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: movaps %xmm0, [[REG2:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: subss [[REG1]], [[REG2]] +; CHECK-LIBCALL-NEXT: cvttss2si [[REG2]], [[REG3:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: movabsq $-9223372036854775808, [[REG4:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: xorq [[REG3]], [[REG4]] +; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, [[REG5:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: ucomiss [[REG1]], %xmm0 +; CHECK-LIBCALL-NEXT: cmovaeq [[REG4]], [[REG5]] +; CHECK-LIBCALL-NEXT: popq %rcx +; CHECK-LIBCALL-NEXT: retq + +; CHECK-F16C-NEXT: movswl (%rdi), [[REG0:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vmovd [[REG0]], [[REG1:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvtph2ps [[REG1]], [[REG2:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vmovss {{.[A-Z_0-9]+}}(%rip), [[REG3:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vsubss [[REG3]], [[REG2]], [[REG4:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvttss2si [[REG4]], [[REG5:%[a-z0-9]+]] +; CHECK-F16C-NEXT: movabsq $-9223372036854775808, [[REG6:%[a-z0-9]+]] +; CHECK-F16C-NEXT: xorq [[REG5]], [[REG6:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvttss2si [[REG2]], [[REG7:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vucomiss [[REG3]], [[REG2]] +; CHECK-F16C-NEXT: cmovaeq [[REG6]], %rax +; CHECK-F16C-NEXT: retq %a = load half, half* %p, align 2 %r = fptoui half %a to i64 ret i64 %r } define void @test_uitofp_i64(i64 %a, half* %p) #0 { -; CHECK-LIBCALL-LABEL: test_uitofp_i64: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: movq %rsi, %rbx -; CHECK-LIBCALL-NEXT: testq %rdi, %rdi -; CHECK-LIBCALL-NEXT: js .LBB10_1 -; CHECK-LIBCALL-NEXT: # BB#2: -; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 -; CHECK-LIBCALL-NEXT: jmp .LBB10_3 -; CHECK-LIBCALL-NEXT: .LBB10_1: -; CHECK-LIBCALL-NEXT: movq %rdi, %rax -; CHECK-LIBCALL-NEXT: shrq %rax -; CHECK-LIBCALL-NEXT: andl $1, %edi -; CHECK-LIBCALL-NEXT: orq %rax, %rdi -; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, %xmm0 -; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 -; CHECK-LIBCALL-NEXT: .LBB10_3: -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_uitofp_i64: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: testq %rdi, %rdi -; CHECK-F16C-NEXT: js .LBB10_1 -; CHECK-F16C-NEXT: # BB#2: -; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 -; CHECK-F16C-NEXT: jmp .LBB10_3 -; CHECK-F16C-NEXT: .LBB10_1: -; CHECK-F16C-NEXT: movq %rdi, %rax -; CHECK-F16C-NEXT: shrq %rax -; CHECK-F16C-NEXT: andl $1, %edi -; CHECK-F16C-NEXT: orq %rax, %rdi -; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; CHECK-F16C-NEXT: .LBB10_3: -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovd %xmm0, %eax -; CHECK-F16C-NEXT: movw %ax, (%rsi) -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_uitofp_i64: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $24, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: xorl %eax, %eax -; CHECK-I686-NEXT: cmpl $0, {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: setns %al -; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fadds {{\.LCPI.*}}(,%eax,4) -; CHECK-I686-NEXT: fstps (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, (%esi) -; CHECK-I686-NEXT: addl $24, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_uitofp_i64: +; CHECK-LIBCALL-NEXT: pushq [[ADDR:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: movq %rsi, [[ADDR]] +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: js [[LABEL1:.LBB[0-9_]+]] + +; simple conversion to float if non-negative +; CHECK-LIBCALL-NEXT: cvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vcvtsi2ssq %rdi, [[REG1:%[a-z0-9]+]], [[REG1]] +; CHECK-NEXT: jmp [[LABEL2:.LBB[0-9_]+]] + +; convert using shift+or if negative +; CHECK-NEXT: [[LABEL1]]: +; CHECK-NEXT: movq %rdi, %rax +; CHECK-NEXT: shrq %rax +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: orq %rax, [[REG2:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: cvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]] +; CHECK-LIBCALL-NEXT: addss [[REG3]], [[REG1]] +; CHECK-F16C-NEXT: vcvtsi2ssq [[REG2]], [[REG3:%[a-z0-9]+]], [[REG3]] +; CHECK-F16C-NEXT: vaddss [[REG3]], [[REG3]], [[REG1:[%a-z0-9]+]] + +; convert float to half +; CHECK-NEXT: [[LABEL2]]: +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movw %ax, ([[ADDR]]) +; CHECK-LIBCALL-NEXT: popq [[ADDR]] +; CHECK-F16C-NEXT: vcvtps2ph $4, [[REG1]], [[REG4:%[a-z0-9]+]] +; CHECK-F16C-NEXT: vmovd [[REG4]], %eax +; CHECK-F16C-NEXT: movw %ax, (%rsi) +; CHECK-NEXT: retq + %r = uitofp i64 %a to half store half %r, half* %p ret void } define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { -; CHECK-LIBCALL-LABEL: test_extend32_vec4: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: subq $48, %rsp -; CHECK-LIBCALL-NEXT: movq %rdi, %rbx -; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload -; CHECK-LIBCALL-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] -; CHECK-LIBCALL-NEXT: insertps $32, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0,1],mem[0],xmm1[3] -; CHECK-LIBCALL-NEXT: insertps $48, {{[0-9]+}}(%rsp), %xmm1 # 16-byte Folded Reload -; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0,1,2],mem[0] -; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 -; CHECK-LIBCALL-NEXT: addq $48, %rsp -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_extend32_vec4: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl 6(%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: movswl 4(%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm1 -; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-F16C-NEXT: movswl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm2 -; CHECK-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-F16C-NEXT: movswl 2(%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm3 -; CHECK-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 -; CHECK-F16C-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] -; CHECK-F16C-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] -; CHECK-F16C-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_extend32_vec4: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $56, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movzwl 2(%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl 4(%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl 6(%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: movzwl (%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; CHECK-I686-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-I686-NEXT: addl $56, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_extend32_vec4: + +; CHECK-LIBCALL: callq __gnu_h2f_ieee +; CHECK-LIBCALL: callq __gnu_h2f_ieee +; CHECK-LIBCALL: callq __gnu_h2f_ieee +; CHECK-LIBCALL: callq __gnu_h2f_ieee +; CHECK-F16C: vcvtph2ps +; CHECK-F16C: vcvtph2ps +; CHECK-F16C: vcvtph2ps +; CHECK-F16C: vcvtph2ps %a = load <4 x half>, <4 x half>* %p, align 8 %b = fpext <4 x half> %a to <4 x float> ret <4 x float> %b } define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { -; CHECK-LIBCALL-LABEL: test_extend64_vec4: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: subq $16, %rsp -; CHECK-LIBCALL-NEXT: movq %rdi, %rbx -; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill -; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill -; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill -; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 -; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Reload -; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero -; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 -; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload -; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero -; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm2 -; CHECK-LIBCALL-NEXT: movss {{[0-9]+}}(%rsp), %xmm1 # 4-byte Reload -; CHECK-LIBCALL-NEXT: # xmm1 = mem[0],zero,zero,zero -; CHECK-LIBCALL-NEXT: cvtss2sd %xmm1, %xmm1 -; CHECK-LIBCALL-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] -; CHECK-LIBCALL-NEXT: addq $16, %rsp -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_extend64_vec4: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl (%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: movswl 2(%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm1 -; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-F16C-NEXT: movswl 4(%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm2 -; CHECK-F16C-NEXT: vcvtph2ps %xmm2, %xmm2 -; CHECK-F16C-NEXT: movswl 6(%rdi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm3 -; CHECK-F16C-NEXT: vcvtph2ps %xmm3, %xmm3 -; CHECK-F16C-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 -; CHECK-F16C-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 -; CHECK-F16C-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] -; CHECK-F16C-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; CHECK-F16C-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_extend64_vec4: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $88, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi -; CHECK-I686-NEXT: movzwl 6(%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl 4(%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl 2(%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstpt {{[0-9]+}}(%esp) # 10-byte Folded Spill -; CHECK-I686-NEXT: movzwl (%esi), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload -; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload -; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: fldt {{[0-9]+}}(%esp) # 10-byte Folded Reload -; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-I686-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-I686-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] -; CHECK-I686-NEXT: addl $88, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_extend64_vec4 + +; CHECK-LIBCALL: callq __gnu_h2f_ieee +; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee +; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee +; CHECK-LIBCALL-DAG: callq __gnu_h2f_ieee +; CHECK-LIBCALL-DAG: cvtss2sd +; CHECK-LIBCALL-DAG: cvtss2sd +; CHECK-LIBCALL-DAG: cvtss2sd +; CHECK-LIBCALL: cvtss2sd +; CHECK-F16C: vcvtph2ps +; CHECK-F16C-DAG: vcvtph2ps +; CHECK-F16C-DAG: vcvtph2ps +; CHECK-F16C-DAG: vcvtph2ps +; CHECK-F16C-DAG: vcvtss2sd +; CHECK-F16C-DAG: vcvtss2sd +; CHECK-F16C-DAG: vcvtss2sd +; CHECK-F16C: vcvtss2sd %a = load <4 x half>, <4 x half>* %p, align 8 %b = fpext <4 x half> %a to <4 x double> ret <4 x double> %b } -define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { -; NOF16-BWINSTS-LABEL: test_trunc32_vec4: -; NOF16-BWINSTS: # BB#0: -; NOF16-BWINSTS-NEXT: pushq %rbp -; NOF16-BWINSTS-NEXT: pushq %r15 -; NOF16-BWINSTS-NEXT: pushq %r14 -; NOF16-BWINSTS-NEXT: pushq %rbx -; NOF16-BWINSTS-NEXT: subq $24, %rsp -; NOF16-BWINSTS-NEXT: movq %rdi, %rbx -; NOF16-BWINSTS-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; NOF16-BWINSTS-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee -; NOF16-BWINSTS-NEXT: movl %eax, %r14d -; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; NOF16-BWINSTS-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee -; NOF16-BWINSTS-NEXT: movl %eax, %r15d -; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; NOF16-BWINSTS-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee -; NOF16-BWINSTS-NEXT: movl %eax, %ebp -; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; NOF16-BWINSTS-NEXT: callq __gnu_f2h_ieee -; NOF16-BWINSTS-NEXT: movw %ax, (%rbx) -; NOF16-BWINSTS-NEXT: movw %bp, 6(%rbx) -; NOF16-BWINSTS-NEXT: movw %r15w, 4(%rbx) -; NOF16-BWINSTS-NEXT: movw %r14w, 2(%rbx) -; NOF16-BWINSTS-NEXT: addq $24, %rsp -; NOF16-BWINSTS-NEXT: popq %rbx -; NOF16-BWINSTS-NEXT: popq %r14 -; NOF16-BWINSTS-NEXT: popq %r15 -; NOF16-BWINSTS-NEXT: popq %rbp -; NOF16-BWINSTS-NEXT: retq -; -; BWOFF-LABEL: test_trunc32_vec4: -; BWOFF: # BB#0: -; BWOFF-NEXT: pushq %rbp -; BWOFF-NEXT: pushq %r15 -; BWOFF-NEXT: pushq %r14 -; BWOFF-NEXT: pushq %rbx -; BWOFF-NEXT: subq $24, %rsp -; BWOFF-NEXT: movq %rdi, %rbx -; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill -; BWOFF-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; BWOFF-NEXT: callq __gnu_f2h_ieee -; BWOFF-NEXT: movw %ax, %r14w -; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; BWOFF-NEXT: callq __gnu_f2h_ieee -; BWOFF-NEXT: movw %ax, %r15w -; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; BWOFF-NEXT: callq __gnu_f2h_ieee -; BWOFF-NEXT: movw %ax, %bp -; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: callq __gnu_f2h_ieee -; BWOFF-NEXT: movw %ax, (%rbx) -; BWOFF-NEXT: movw %bp, 6(%rbx) -; BWOFF-NEXT: movw %r15w, 4(%rbx) -; BWOFF-NEXT: movw %r14w, 2(%rbx) -; BWOFF-NEXT: addq $24, %rsp -; BWOFF-NEXT: popq %rbx -; BWOFF-NEXT: popq %r14 -; BWOFF-NEXT: popq %r15 -; BWOFF-NEXT: popq %rbp -; BWOFF-NEXT: retq -; -; CHECK-F16C-LABEL: test_trunc32_vec4: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vmovd %xmm1, %eax -; CHECK-F16C-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vmovd %xmm1, %ecx -; CHECK-F16C-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vmovd %xmm1, %edx -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vmovd %xmm0, %esi -; CHECK-F16C-NEXT: movw %si, (%rdi) -; CHECK-F16C-NEXT: movw %dx, 6(%rdi) -; CHECK-F16C-NEXT: movw %cx, 4(%rdi) -; CHECK-F16C-NEXT: movw %ax, 2(%rdi) -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_trunc32_vec4: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %ebp -; CHECK-I686-NEXT: pushl %ebx -; CHECK-I686-NEXT: pushl %edi -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $44, %esp -; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; CHECK-I686-NEXT: movaps %xmm0, %xmm1 -; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] -; CHECK-I686-NEXT: movss %xmm1, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, %si -; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; CHECK-I686-NEXT: movss %xmm0, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, %di -; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] -; CHECK-I686-NEXT: movss %xmm0, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, %bx -; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: movss %xmm0, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movw %ax, (%ebp) -; CHECK-I686-NEXT: movw %bx, 6(%ebp) -; CHECK-I686-NEXT: movw %di, 4(%ebp) -; CHECK-I686-NEXT: movw %si, 2(%ebp) -; CHECK-I686-NEXT: addl $44, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: popl %edi -; CHECK-I686-NEXT: popl %ebx -; CHECK-I686-NEXT: popl %ebp -; CHECK-I686-NEXT: retl +define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) { +; CHECK-LABEL: test_trunc32_vec4: + +; CHECK-LIBCALL: callq __gnu_f2h_ieee +; CHECK-LIBCALL: callq __gnu_f2h_ieee +; CHECK-LIBCALL: callq __gnu_f2h_ieee +; CHECK-LIBCALL: callq __gnu_f2h_ieee +; CHECK-F16C: vcvtps2ph +; CHECK-F16C: vcvtps2ph +; CHECK-F16C: vcvtps2ph +; CHECK-F16C: vcvtps2ph +; CHECK: movw +; CHECK: movw +; CHECK: movw +; CHECK: movw %v = fptrunc <4 x float> %a to <4 x half> store <4 x half> %v, <4 x half>* %p ret void } -define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { -; NOF16-BWINSTS-LABEL: test_trunc64_vec4: -; NOF16-BWINSTS: # BB#0: -; NOF16-BWINSTS-NEXT: pushq %rbp -; NOF16-BWINSTS-NEXT: pushq %r15 -; NOF16-BWINSTS-NEXT: pushq %r14 -; NOF16-BWINSTS-NEXT: pushq %rbx -; NOF16-BWINSTS-NEXT: subq $40, %rsp -; NOF16-BWINSTS-NEXT: movq %rdi, %rbx -; NOF16-BWINSTS-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill -; NOF16-BWINSTS-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; NOF16-BWINSTS-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; NOF16-BWINSTS-NEXT: callq __truncdfhf2 -; NOF16-BWINSTS-NEXT: movl %eax, %r14d -; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; NOF16-BWINSTS-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; NOF16-BWINSTS-NEXT: callq __truncdfhf2 -; NOF16-BWINSTS-NEXT: movl %eax, %r15d -; NOF16-BWINSTS-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; NOF16-BWINSTS-NEXT: callq __truncdfhf2 -; NOF16-BWINSTS-NEXT: movl %eax, %ebp -; NOF16-BWINSTS-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; NOF16-BWINSTS-NEXT: callq __truncdfhf2 -; NOF16-BWINSTS-NEXT: movw %ax, 4(%rbx) -; NOF16-BWINSTS-NEXT: movw %bp, (%rbx) -; NOF16-BWINSTS-NEXT: movw %r15w, 6(%rbx) -; NOF16-BWINSTS-NEXT: movw %r14w, 2(%rbx) -; NOF16-BWINSTS-NEXT: addq $40, %rsp -; NOF16-BWINSTS-NEXT: popq %rbx -; NOF16-BWINSTS-NEXT: popq %r14 -; NOF16-BWINSTS-NEXT: popq %r15 -; NOF16-BWINSTS-NEXT: popq %rbp -; NOF16-BWINSTS-NEXT: retq -; -; BWOFF-LABEL: test_trunc64_vec4: -; BWOFF: # BB#0: -; BWOFF-NEXT: pushq %rbp -; BWOFF-NEXT: pushq %r15 -; BWOFF-NEXT: pushq %r14 -; BWOFF-NEXT: pushq %rbx -; BWOFF-NEXT: subq $40, %rsp -; BWOFF-NEXT: movq %rdi, %rbx -; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill -; BWOFF-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; BWOFF-NEXT: callq __truncdfhf2 -; BWOFF-NEXT: movw %ax, %r14w -; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; BWOFF-NEXT: callq __truncdfhf2 -; BWOFF-NEXT: movw %ax, %r15w -; BWOFF-NEXT: movaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: callq __truncdfhf2 -; BWOFF-NEXT: movw %ax, %bp -; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload -; BWOFF-NEXT: callq __truncdfhf2 -; BWOFF-NEXT: movw %ax, 4(%rbx) -; BWOFF-NEXT: movw %bp, (%rbx) -; BWOFF-NEXT: movw %r15w, 6(%rbx) -; BWOFF-NEXT: movw %r14w, 2(%rbx) -; BWOFF-NEXT: addq $40, %rsp -; BWOFF-NEXT: popq %rbx -; BWOFF-NEXT: popq %r14 -; BWOFF-NEXT: popq %r15 -; BWOFF-NEXT: popq %rbp -; BWOFF-NEXT: retq -; -; CHECK-F16C-LABEL: test_trunc64_vec4: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: pushq %rbp -; CHECK-F16C-NEXT: pushq %r15 -; CHECK-F16C-NEXT: pushq %r14 -; CHECK-F16C-NEXT: pushq %rbx -; CHECK-F16C-NEXT: subq $88, %rsp -; CHECK-F16C-NEXT: movq %rdi, %rbx -; CHECK-F16C-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-F16C-NEXT: vzeroupper -; CHECK-F16C-NEXT: callq __truncdfhf2 -; CHECK-F16C-NEXT: movl %eax, %r14d -; CHECK-F16C-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; CHECK-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 -; CHECK-F16C-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; CHECK-F16C-NEXT: vzeroupper -; CHECK-F16C-NEXT: callq __truncdfhf2 -; CHECK-F16C-NEXT: movl %eax, %r15d -; CHECK-F16C-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload -; CHECK-F16C-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; CHECK-F16C-NEXT: vzeroupper -; CHECK-F16C-NEXT: callq __truncdfhf2 -; CHECK-F16C-NEXT: movl %eax, %ebp -; CHECK-F16C-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload -; CHECK-F16C-NEXT: callq __truncdfhf2 -; CHECK-F16C-NEXT: movw %ax, 4(%rbx) -; CHECK-F16C-NEXT: movw %bp, (%rbx) -; CHECK-F16C-NEXT: movw %r15w, 6(%rbx) -; CHECK-F16C-NEXT: movw %r14w, 2(%rbx) -; CHECK-F16C-NEXT: addq $88, %rsp -; CHECK-F16C-NEXT: popq %rbx -; CHECK-F16C-NEXT: popq %r14 -; CHECK-F16C-NEXT: popq %r15 -; CHECK-F16C-NEXT: popq %rbp -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_trunc64_vec4: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: pushl %ebp -; CHECK-I686-NEXT: pushl %ebx -; CHECK-I686-NEXT: pushl %edi -; CHECK-I686-NEXT: pushl %esi -; CHECK-I686-NEXT: subl $60, %esp -; CHECK-I686-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) # 16-byte Spill -; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; CHECK-I686-NEXT: movlps %xmm0, (%esp) -; CHECK-I686-NEXT: calll __truncdfhf2 -; CHECK-I686-NEXT: movw %ax, %si -; CHECK-I686-NEXT: movapd {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: movhpd %xmm0, (%esp) -; CHECK-I686-NEXT: calll __truncdfhf2 -; CHECK-I686-NEXT: movw %ax, %di -; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: movlps %xmm0, (%esp) -; CHECK-I686-NEXT: calll __truncdfhf2 -; CHECK-I686-NEXT: movw %ax, %bx -; CHECK-I686-NEXT: movapd {{[0-9]+}}(%esp), %xmm0 # 16-byte Reload -; CHECK-I686-NEXT: movhpd %xmm0, (%esp) -; CHECK-I686-NEXT: calll __truncdfhf2 -; CHECK-I686-NEXT: movw %ax, 6(%ebp) -; CHECK-I686-NEXT: movw %bx, 4(%ebp) -; CHECK-I686-NEXT: movw %di, 2(%ebp) -; CHECK-I686-NEXT: movw %si, (%ebp) -; CHECK-I686-NEXT: addl $60, %esp -; CHECK-I686-NEXT: popl %esi -; CHECK-I686-NEXT: popl %edi -; CHECK-I686-NEXT: popl %ebx -; CHECK-I686-NEXT: popl %ebp -; CHECK-I686-NEXT: retl +define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) { +; CHECK-LABEL: test_trunc64_vec4: +; CHECK: callq __truncdfhf2 +; CHECK: callq __truncdfhf2 +; CHECK: callq __truncdfhf2 +; CHECK: callq __truncdfhf2 +; CHECK: movw +; CHECK: movw +; CHECK: movw +; CHECK: movw %v = fptrunc <4 x double> %a to <4 x half> store <4 x half> %v, <4 x half>* %p ret void @@ -836,99 +268,44 @@ define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { declare float @test_floatret(); +; On i686, if SSE2 is available, the return value from test_floatret is loaded +; to f80 and then rounded to f32. The DAG combiner should not combine this +; fp_round and the subsequent fptrunc from float to half. define half @test_f80trunc_nodagcombine() #0 { -; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rax -; CHECK-LIBCALL-NEXT: callq test_floatret -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movzwl %ax, %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: popq %rax -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_f80trunc_nodagcombine: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: pushq %rax -; CHECK-F16C-NEXT: callq test_floatret -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: popq %rax -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_f80trunc_nodagcombine: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: subl $12, %esp -; CHECK-I686-NEXT: calll test_floatret -; CHECK-I686-NEXT: fstps (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movzwl %ax, %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: addl $12, %esp -; CHECK-I686-NEXT: retl +; CHECK-LABEL: test_f80trunc_nodagcombine: +; CHECK-I686-NOT: calll __truncxfhf2 %1 = call float @test_floatret() %2 = fptrunc float %1 to half ret half %2 } +; CHECK-LABEL: test_sitofp_fadd_i32: +; CHECK-LIBCALL-NEXT: pushq %rbx +; CHECK-LIBCALL-NEXT: subq $16, %rsp +; CHECK-LIBCALL-NEXT: movl %edi, %ebx +; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: movss %xmm0, 12(%rsp) +; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 +; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee +; CHECK-LIBCALL-NEXT: movzwl %ax, %edi +; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee +; CHECK-LIBCALL-NEXT: addss 12(%rsp), %xmm0 +; CHECK-LIBCALL-NEXT: addq $16, %rsp +; CHECK-LIBCALL-NEXT: popq %rbx +; CHECK-LIBCALL-NEXT: retq +; CHECK-F16C-NEXT: movswl (%rsi), %eax +; CHECK-F16C-NEXT: vmovd %eax, %xmm0 +; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 +; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 +; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 +; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 +; CHECK-F16C-NEXT: retq define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { -; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: -; CHECK-LIBCALL: # BB#0: -; CHECK-LIBCALL-NEXT: pushq %rbx -; CHECK-LIBCALL-NEXT: subq $16, %rsp -; CHECK-LIBCALL-NEXT: movl %edi, %ebx -; CHECK-LIBCALL-NEXT: movzwl (%rsi), %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill -; CHECK-LIBCALL-NEXT: cvtsi2ssl %ebx, %xmm0 -; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee -; CHECK-LIBCALL-NEXT: movzwl %ax, %edi -; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee -; CHECK-LIBCALL-NEXT: addss {{[0-9]+}}(%rsp), %xmm0 # 4-byte Folded Reload -; CHECK-LIBCALL-NEXT: addq $16, %rsp -; CHECK-LIBCALL-NEXT: popq %rbx -; CHECK-LIBCALL-NEXT: retq -; -; CHECK-F16C-LABEL: test_sitofp_fadd_i32: -; CHECK-F16C: # BB#0: -; CHECK-F16C-NEXT: movswl (%rsi), %eax -; CHECK-F16C-NEXT: vmovd %eax, %xmm0 -; CHECK-F16C-NEXT: vcvtsi2ssl %edi, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 -; CHECK-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 -; CHECK-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 -; CHECK-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 -; CHECK-F16C-NEXT: retq -; -; CHECK-I686-LABEL: test_sitofp_fadd_i32: -; CHECK-I686: # BB#0: -; CHECK-I686-NEXT: subl $28, %esp -; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-I686-NEXT: movzwl (%eax), %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) # 4-byte Spill -; CHECK-I686-NEXT: xorps %xmm0, %xmm0 -; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 -; CHECK-I686-NEXT: movss %xmm0, (%esp) -; CHECK-I686-NEXT: calll __gnu_f2h_ieee -; CHECK-I686-NEXT: movzwl %ax, %eax -; CHECK-I686-NEXT: movl %eax, (%esp) -; CHECK-I686-NEXT: calll __gnu_h2f_ieee -; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # 4-byte Reload -; CHECK-I686-NEXT: # xmm0 = mem[0],zero,zero,zero -; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 -; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) -; CHECK-I686-NEXT: addl $28, %esp -; CHECK-I686-NEXT: retl %tmp0 = load half, half* %b %tmp1 = sitofp i32 %a to half %tmp2 = fadd half %tmp0, %tmp1 diff --git a/test/CodeGen/X86/illegal-bitfield-loadstore.ll b/test/CodeGen/X86/illegal-bitfield-loadstore.ll index 54214764c9a..ceb46571190 100644 --- a/test/CodeGen/X86/illegal-bitfield-loadstore.ll +++ b/test/CodeGen/X86/illegal-bitfield-loadstore.ll @@ -112,23 +112,23 @@ define void @i56_and_or(i56* %a) { define void @i56_insert_bit(i56* %a, i1 zeroext %bit) { ; CHECK-LABEL: i56_insert_bit: ; CHECK: # BB#0: -; CHECK-NEXT: movzwl 4(%rdi), %eax -; CHECK-NEXT: movzbl 6(%rdi), %ecx -; CHECK-NEXT: movl (%rdi), %edx -; CHECK-NEXT: movb %cl, 6(%rdi) -; CHECK-NEXT: movzbl %sil, %esi -; CHECK-NEXT: # kill: %ECX %ECX %RCX %RCX -; CHECK-NEXT: shll $16, %ecx -; CHECK-NEXT: orl %eax, %ecx -; CHECK-NEXT: shlq $32, %rcx -; CHECK-NEXT: orq %rcx, %rdx -; CHECK-NEXT: shlq $13, %rsi -; CHECK-NEXT: movabsq $72057594037919743, %rax # imm = 0xFFFFFFFFFFDFFF -; CHECK-NEXT: andq %rdx, %rax -; CHECK-NEXT: orq %rsi, %rax -; CHECK-NEXT: movl %eax, (%rdi) -; CHECK-NEXT: shrq $32, %rax -; CHECK-NEXT: movw %ax, 4(%rdi) +; CHECK-NEXT: movzbl %sil, %eax +; CHECK-NEXT: movzwl 4(%rdi), %ecx +; CHECK-NEXT: movzbl 6(%rdi), %edx +; CHECK-NEXT: movl (%rdi), %esi +; CHECK-NEXT: movb %dl, 6(%rdi) +; CHECK-NEXT: # kill: %EDX %EDX %RDX %RDX +; CHECK-NEXT: shll $16, %edx +; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: shlq $32, %rdx +; CHECK-NEXT: orq %rdx, %rsi +; CHECK-NEXT: shlq $13, %rax +; CHECK-NEXT: movabsq $72057594037919743, %rcx # imm = 0xFFFFFFFFFFDFFF +; CHECK-NEXT: andq %rsi, %rcx +; CHECK-NEXT: orq %rax, %rcx +; CHECK-NEXT: movl %ecx, (%rdi) +; CHECK-NEXT: shrq $32, %rcx +; CHECK-NEXT: movw %cx, 4(%rdi) ; CHECK-NEXT: retq %extbit = zext i1 %bit to i56 %b = load i56, i56* %a, align 1 diff --git a/test/CodeGen/X86/mul-constant-i32.ll b/test/CodeGen/X86/mul-constant-i32.ll index a8b594904b6..d545b477e10 100644 --- a/test/CodeGen/X86/mul-constant-i32.ll +++ b/test/CodeGen/X86/mul-constant-i32.ll @@ -17,7 +17,7 @@ define i32 @test_mul_by_1(i32 %x) { ; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG: # BB#0: @@ -32,7 +32,7 @@ define i32 @test_mul_by_1(i32 %x) { ; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT: # BB#0: @@ -63,7 +63,7 @@ define i32 @test_mul_by_2(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG: # BB#0: @@ -81,7 +81,7 @@ define i32 @test_mul_by_2(i32 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT: # BB#0: @@ -114,7 +114,7 @@ define i32 @test_mul_by_3(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG: # BB#0: @@ -131,7 +131,7 @@ define i32 @test_mul_by_3(i32 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT: # BB#0: @@ -165,7 +165,7 @@ define i32 @test_mul_by_4(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG: # BB#0: @@ -183,7 +183,7 @@ define i32 @test_mul_by_4(i32 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT: # BB#0: @@ -216,7 +216,7 @@ define i32 @test_mul_by_5(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG: # BB#0: @@ -233,7 +233,7 @@ define i32 @test_mul_by_5(i32 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT: # BB#0: @@ -269,7 +269,7 @@ define i32 @test_mul_by_6(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG: # BB#0: @@ -285,8 +285,8 @@ define i32 @test_mul_by_6(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $6, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT: # BB#0: @@ -321,7 +321,7 @@ define i32 @test_mul_by_7(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG: # BB#0: @@ -337,8 +337,8 @@ define i32 @test_mul_by_7(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $7, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT: # BB#0: @@ -371,7 +371,7 @@ define i32 @test_mul_by_8(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG: # BB#0: @@ -389,7 +389,7 @@ define i32 @test_mul_by_8(i32 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (,%rdi,8), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT: # BB#0: @@ -422,7 +422,7 @@ define i32 @test_mul_by_9(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG: # BB#0: @@ -439,7 +439,7 @@ define i32 @test_mul_by_9(i32 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: # kill: %EDI %EDI %RDI ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT: # BB#0: @@ -475,7 +475,7 @@ define i32 @test_mul_by_10(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG: # BB#0: @@ -491,8 +491,8 @@ define i32 @test_mul_by_10(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $10, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT: # BB#0: @@ -527,7 +527,7 @@ define i32 @test_mul_by_11(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG: # BB#0: @@ -543,8 +543,8 @@ define i32 @test_mul_by_11(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $11, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT: # BB#0: @@ -575,9 +575,9 @@ define i32 @test_mul_by_12(i32 %x) { ; X64-HSW-LABEL: test_mul_by_12: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG: # BB#0: @@ -593,8 +593,8 @@ define i32 @test_mul_by_12(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $12, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT: # BB#0: @@ -629,7 +629,7 @@ define i32 @test_mul_by_13(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG: # BB#0: @@ -645,8 +645,8 @@ define i32 @test_mul_by_13(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $13, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT: # BB#0: @@ -681,7 +681,7 @@ define i32 @test_mul_by_14(i32 %x) { ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # BB#0: @@ -698,8 +698,8 @@ define i32 @test_mul_by_14(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $14, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT: # BB#0: @@ -732,7 +732,7 @@ define i32 @test_mul_by_15(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG: # BB#0: @@ -748,8 +748,8 @@ define i32 @test_mul_by_15(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $15, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT: # BB#0: @@ -780,9 +780,9 @@ define i32 @test_mul_by_16(i32 %x) { ; ; X64-HSW-LABEL: test_mul_by_16: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $4, %edi # sched: [1:1.00] +; X64-HSW-NEXT: shll $4, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: @@ -798,9 +798,9 @@ define i32 @test_mul_by_16(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_16: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:1.00] +; HSW-NOOPT-NEXT: shll $4, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: @@ -836,9 +836,9 @@ define i32 @test_mul_by_17(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $4, %eax # sched: [1:1.00] +; X64-HSW-NEXT: shll $4, %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rdi), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG: # BB#0: @@ -855,8 +855,8 @@ define i32 @test_mul_by_17(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $17, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT: # BB#0: @@ -892,7 +892,7 @@ define i32 @test_mul_by_18(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: addl %edi, %edi # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG: # BB#0: @@ -908,8 +908,8 @@ define i32 @test_mul_by_18(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $18, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT: # BB#0: @@ -944,9 +944,9 @@ define i32 @test_mul_by_19(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: shll $2, %eax # sched: [1:1.00] +; X64-HSW-NEXT: shll $2, %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # BB#0: @@ -963,8 +963,8 @@ define i32 @test_mul_by_19(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $19, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT: # BB#0: @@ -995,9 +995,9 @@ define i32 @test_mul_by_20(i32 %x) { ; X64-HSW-LABEL: test_mul_by_20: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: shll $2, %edi # sched: [1:1.00] +; X64-HSW-NEXT: shll $2, %edi # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG: # BB#0: @@ -1013,8 +1013,8 @@ define i32 @test_mul_by_20(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $20, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT: # BB#0: @@ -1049,7 +1049,7 @@ define i32 @test_mul_by_21(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG: # BB#0: @@ -1065,8 +1065,8 @@ define i32 @test_mul_by_21(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $21, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT: # BB#0: @@ -1101,7 +1101,7 @@ define i32 @test_mul_by_22(i32 %x) { ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rax,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # BB#0: @@ -1118,8 +1118,8 @@ define i32 @test_mul_by_22(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $22, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT: # BB#0: @@ -1152,9 +1152,9 @@ define i32 @test_mul_by_23(i32 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: shll $3, %eax # sched: [1:1.00] +; X64-HSW-NEXT: shll $3, %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG: # BB#0: @@ -1171,8 +1171,8 @@ define i32 @test_mul_by_23(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $23, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT: # BB#0: @@ -1203,9 +1203,9 @@ define i32 @test_mul_by_24(i32 %x) { ; X64-HSW-LABEL: test_mul_by_24: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI -; X64-HSW-NEXT: shll $3, %edi # sched: [1:1.00] +; X64-HSW-NEXT: shll $3, %edi # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rdi,%rdi,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG: # BB#0: @@ -1221,8 +1221,8 @@ define i32 @test_mul_by_24(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $24, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT: # BB#0: @@ -1257,7 +1257,7 @@ define i32 @test_mul_by_25(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,4), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG: # BB#0: @@ -1273,8 +1273,8 @@ define i32 @test_mul_by_25(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $25, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT: # BB#0: @@ -1311,7 +1311,7 @@ define i32 @test_mul_by_26(i32 %x) { ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG: # BB#0: @@ -1328,8 +1328,8 @@ define i32 @test_mul_by_26(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $26, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT: # BB#0: @@ -1362,7 +1362,7 @@ define i32 @test_mul_by_27(i32 %x) { ; X64-HSW-NEXT: # kill: %EDI %EDI %RDI ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG: # BB#0: @@ -1378,8 +1378,8 @@ define i32 @test_mul_by_27(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $27, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT: # BB#0: @@ -1416,7 +1416,7 @@ define i32 @test_mul_by_28(i32 %x) { ; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50] ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG: # BB#0: @@ -1433,8 +1433,8 @@ define i32 @test_mul_by_28(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $28, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT: # BB#0: @@ -1471,7 +1471,7 @@ define i32 @test_mul_by_29(i32 %x) { ; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: @@ -1489,8 +1489,8 @@ define i32 @test_mul_by_29(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $29, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT: # BB#0: @@ -1523,10 +1523,10 @@ define i32 @test_mul_by_30(i32 %x) { ; X64-HSW-LABEL: test_mul_by_30: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG: # BB#0: @@ -1543,8 +1543,8 @@ define i32 @test_mul_by_30(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $30, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT: # BB#0: @@ -1576,9 +1576,9 @@ define i32 @test_mul_by_31(i32 %x) { ; X64-HSW-LABEL: test_mul_by_31: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: shll $5, %eax # sched: [1:1.00] +; X64-HSW-NEXT: shll $5, %eax # sched: [1:0.50] ; X64-HSW-NEXT: subl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG: # BB#0: @@ -1594,8 +1594,8 @@ define i32 @test_mul_by_31(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull $31, %edi, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT: # BB#0: @@ -1626,9 +1626,9 @@ define i32 @test_mul_by_32(i32 %x) { ; ; X64-HSW-LABEL: test_mul_by_32: ; X64-HSW: # BB#0: -; X64-HSW-NEXT: shll $5, %edi # sched: [1:1.00] +; X64-HSW-NEXT: shll $5, %edi # sched: [1:0.50] ; X64-HSW-NEXT: movl %edi, %eax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: @@ -1644,9 +1644,9 @@ define i32 @test_mul_by_32(i32 %x) { ; ; HSW-NOOPT-LABEL: test_mul_by_32: ; HSW-NOOPT: # BB#0: -; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:1.00] +; HSW-NOOPT-NEXT: shll $5, %edi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movl %edi, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: @@ -1686,8 +1686,8 @@ define i32 @test_mul_spec(i32 %x) nounwind { ; X64-HSW-NEXT: addl $42, %ecx # sched: [1:0.25] ; X64-HSW-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; X64-HSW-NEXT: addl $2, %eax # sched: [1:0.25] -; X64-HSW-NEXT: imull %ecx, %eax # sched: [3:1.00] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: imull %ecx, %eax # sched: [4:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_spec: ; X64-JAG: # BB#0: @@ -1712,8 +1712,8 @@ define i32 @test_mul_spec(i32 %x) nounwind { ; HSW-NOOPT-NEXT: addl $42, %ecx # sched: [1:0.25] ; HSW-NOOPT-NEXT: leal (%rdi,%rdi,4), %eax # sched: [1:0.50] ; HSW-NOOPT-NEXT: addl $2, %eax # sched: [1:0.25] -; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: imull %ecx, %eax # sched: [4:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT: # BB#0: diff --git a/test/CodeGen/X86/mul-constant-i64.ll b/test/CodeGen/X86/mul-constant-i64.ll index bf732f2f536..ea841c761c7 100644 --- a/test/CodeGen/X86/mul-constant-i64.ll +++ b/test/CodeGen/X86/mul-constant-i64.ll @@ -18,7 +18,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind { ; X64-HSW-LABEL: test_mul_by_1: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_1: ; X64-JAG: # BB#0: @@ -34,7 +34,7 @@ define i64 @test_mul_by_1(i64 %x) nounwind { ; HSW-NOOPT-LABEL: test_mul_by_1: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_1: ; JAG-NOOPT: # BB#0: @@ -66,7 +66,7 @@ define i64 @test_mul_by_2(i64 %x) { ; X64-HSW-LABEL: test_mul_by_2: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_2: ; X64-JAG: # BB#0: @@ -84,7 +84,7 @@ define i64 @test_mul_by_2(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_2: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_2: ; JAG-NOOPT: # BB#0: @@ -116,7 +116,7 @@ define i64 @test_mul_by_3(i64 %x) { ; X64-HSW-LABEL: test_mul_by_3: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_3: ; X64-JAG: # BB#0: @@ -134,7 +134,7 @@ define i64 @test_mul_by_3(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_3: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_3: ; JAG-NOOPT: # BB#0: @@ -166,7 +166,7 @@ define i64 @test_mul_by_4(i64 %x) { ; X64-HSW-LABEL: test_mul_by_4: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_4: ; X64-JAG: # BB#0: @@ -184,7 +184,7 @@ define i64 @test_mul_by_4(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_4: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_4: ; JAG-NOOPT: # BB#0: @@ -216,7 +216,7 @@ define i64 @test_mul_by_5(i64 %x) { ; X64-HSW-LABEL: test_mul_by_5: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_5: ; X64-JAG: # BB#0: @@ -234,7 +234,7 @@ define i64 @test_mul_by_5(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_5: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_5: ; JAG-NOOPT: # BB#0: @@ -268,7 +268,7 @@ define i64 @test_mul_by_6(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_6: ; X64-JAG: # BB#0: @@ -287,7 +287,7 @@ define i64 @test_mul_by_6(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_6: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $6, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_6: ; JAG-NOOPT: # BB#0: @@ -323,7 +323,7 @@ define i64 @test_mul_by_7(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_7: ; X64-JAG: # BB#0: @@ -342,7 +342,7 @@ define i64 @test_mul_by_7(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_7: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $7, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_7: ; JAG-NOOPT: # BB#0: @@ -375,7 +375,7 @@ define i64 @test_mul_by_8(i64 %x) { ; X64-HSW-LABEL: test_mul_by_8: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_8: ; X64-JAG: # BB#0: @@ -393,7 +393,7 @@ define i64 @test_mul_by_8(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_8: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (,%rdi,8), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_8: ; JAG-NOOPT: # BB#0: @@ -425,7 +425,7 @@ define i64 @test_mul_by_9(i64 %x) { ; X64-HSW-LABEL: test_mul_by_9: ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_9: ; X64-JAG: # BB#0: @@ -443,7 +443,7 @@ define i64 @test_mul_by_9(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_9: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_9: ; JAG-NOOPT: # BB#0: @@ -477,7 +477,7 @@ define i64 @test_mul_by_10(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_10: ; X64-JAG: # BB#0: @@ -496,7 +496,7 @@ define i64 @test_mul_by_10(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_10: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $10, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_10: ; JAG-NOOPT: # BB#0: @@ -532,7 +532,7 @@ define i64 @test_mul_by_11(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_11: ; X64-JAG: # BB#0: @@ -551,7 +551,7 @@ define i64 @test_mul_by_11(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_11: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $11, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_11: ; JAG-NOOPT: # BB#0: @@ -585,7 +585,7 @@ define i64 @test_mul_by_12(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_12: ; X64-JAG: # BB#0: @@ -604,7 +604,7 @@ define i64 @test_mul_by_12(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_12: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $12, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_12: ; JAG-NOOPT: # BB#0: @@ -640,7 +640,7 @@ define i64 @test_mul_by_13(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_13: ; X64-JAG: # BB#0: @@ -659,7 +659,7 @@ define i64 @test_mul_by_13(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_13: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $13, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_13: ; JAG-NOOPT: # BB#0: @@ -696,7 +696,7 @@ define i64 @test_mul_by_14(i64 %x) { ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_14: ; X64-JAG: # BB#0: @@ -716,7 +716,7 @@ define i64 @test_mul_by_14(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_14: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $14, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_14: ; JAG-NOOPT: # BB#0: @@ -751,7 +751,7 @@ define i64 @test_mul_by_15(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_15: ; X64-JAG: # BB#0: @@ -770,7 +770,7 @@ define i64 @test_mul_by_15(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_15: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $15, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_15: ; JAG-NOOPT: # BB#0: @@ -804,7 +804,7 @@ define i64 @test_mul_by_16(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $4, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_16: ; X64-JAG: # BB#0: @@ -824,7 +824,7 @@ define i64 @test_mul_by_16(i64 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: shlq $4, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_16: ; JAG-NOOPT: # BB#0: @@ -864,7 +864,7 @@ define i64 @test_mul_by_17(i64 %x) { ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: shlq $4, %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rdi), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_17: ; X64-JAG: # BB#0: @@ -884,7 +884,7 @@ define i64 @test_mul_by_17(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_17: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $17, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_17: ; JAG-NOOPT: # BB#0: @@ -920,7 +920,7 @@ define i64 @test_mul_by_18(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: addq %rdi, %rdi # sched: [1:0.25] ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_18: ; X64-JAG: # BB#0: @@ -939,7 +939,7 @@ define i64 @test_mul_by_18(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_18: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $18, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_18: ; JAG-NOOPT: # BB#0: @@ -977,7 +977,7 @@ define i64 @test_mul_by_19(i64 %x) { ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $2, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_19: ; X64-JAG: # BB#0: @@ -997,7 +997,7 @@ define i64 @test_mul_by_19(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_19: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $19, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_19: ; JAG-NOOPT: # BB#0: @@ -1031,7 +1031,7 @@ define i64 @test_mul_by_20(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $2, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_20: ; X64-JAG: # BB#0: @@ -1050,7 +1050,7 @@ define i64 @test_mul_by_20(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_20: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $20, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_20: ; JAG-NOOPT: # BB#0: @@ -1086,7 +1086,7 @@ define i64 @test_mul_by_21(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_21: ; X64-JAG: # BB#0: @@ -1105,7 +1105,7 @@ define i64 @test_mul_by_21(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_21: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $21, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_21: ; JAG-NOOPT: # BB#0: @@ -1142,7 +1142,7 @@ define i64 @test_mul_by_22(i64 %x) { ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rax,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_22: ; X64-JAG: # BB#0: @@ -1162,7 +1162,7 @@ define i64 @test_mul_by_22(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_22: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $22, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_22: ; JAG-NOOPT: # BB#0: @@ -1199,7 +1199,7 @@ define i64 @test_mul_by_23(i64 %x) { ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: shlq $3, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_23: ; X64-JAG: # BB#0: @@ -1219,7 +1219,7 @@ define i64 @test_mul_by_23(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_23: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $23, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_23: ; JAG-NOOPT: # BB#0: @@ -1253,7 +1253,7 @@ define i64 @test_mul_by_24(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $3, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rdi,%rdi,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_24: ; X64-JAG: # BB#0: @@ -1272,7 +1272,7 @@ define i64 @test_mul_by_24(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_24: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $24, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_24: ; JAG-NOOPT: # BB#0: @@ -1308,7 +1308,7 @@ define i64 @test_mul_by_25(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,4), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_25: ; X64-JAG: # BB#0: @@ -1327,7 +1327,7 @@ define i64 @test_mul_by_25(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_25: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $25, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_25: ; JAG-NOOPT: # BB#0: @@ -1365,7 +1365,7 @@ define i64 @test_mul_by_26(i64 %x) { ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_26: ; X64-JAG: # BB#0: @@ -1385,7 +1385,7 @@ define i64 @test_mul_by_26(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_26: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $26, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_26: ; JAG-NOOPT: # BB#0: @@ -1420,7 +1420,7 @@ define i64 @test_mul_by_27(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_27: ; X64-JAG: # BB#0: @@ -1439,7 +1439,7 @@ define i64 @test_mul_by_27(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_27: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $27, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_27: ; JAG-NOOPT: # BB#0: @@ -1477,7 +1477,7 @@ define i64 @test_mul_by_28(i64 %x) { ; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50] ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_28: ; X64-JAG: # BB#0: @@ -1497,7 +1497,7 @@ define i64 @test_mul_by_28(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_28: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $28, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_28: ; JAG-NOOPT: # BB#0: @@ -1536,7 +1536,7 @@ define i64 @test_mul_by_29(i64 %x) { ; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_29: ; X64-JAG: # BB#0: @@ -1557,7 +1557,7 @@ define i64 @test_mul_by_29(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_29: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $29, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_29: ; JAG-NOOPT: # BB#0: @@ -1596,7 +1596,7 @@ define i64 @test_mul_by_30(i64 %x) { ; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_30: ; X64-JAG: # BB#0: @@ -1617,7 +1617,7 @@ define i64 @test_mul_by_30(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_30: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $30, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_30: ; JAG-NOOPT: # BB#0: @@ -1654,7 +1654,7 @@ define i64 @test_mul_by_31(i64 %x) { ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] ; X64-HSW-NEXT: shlq $5, %rax # sched: [1:0.50] ; X64-HSW-NEXT: subq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_31: ; X64-JAG: # BB#0: @@ -1674,7 +1674,7 @@ define i64 @test_mul_by_31(i64 %x) { ; HSW-NOOPT-LABEL: test_mul_by_31: ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: imulq $31, %rdi, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_31: ; JAG-NOOPT: # BB#0: @@ -1709,7 +1709,7 @@ define i64 @test_mul_by_32(i64 %x) { ; X64-HSW: # BB#0: ; X64-HSW-NEXT: shlq $5, %rdi # sched: [1:0.50] ; X64-HSW-NEXT: movq %rdi, %rax # sched: [1:0.25] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_by_32: ; X64-JAG: # BB#0: @@ -1729,7 +1729,7 @@ define i64 @test_mul_by_32(i64 %x) { ; HSW-NOOPT: # BB#0: ; HSW-NOOPT-NEXT: shlq $5, %rdi # sched: [1:0.50] ; HSW-NOOPT-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_by_32: ; JAG-NOOPT: # BB#0: @@ -1793,7 +1793,7 @@ define i64 @test_mul_spec(i64 %x) nounwind { ; X64-HSW-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; X64-HSW-NEXT: addq $2, %rax # sched: [1:0.25] ; X64-HSW-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; X64-HSW-NEXT: retq # sched: [2:1.00] +; X64-HSW-NEXT: retq # sched: [1:1.00] ; ; X64-JAG-LABEL: test_mul_spec: ; X64-JAG: # BB#0: @@ -1841,7 +1841,7 @@ define i64 @test_mul_spec(i64 %x) nounwind { ; HSW-NOOPT-NEXT: leaq (%rdi,%rdi,4), %rax # sched: [1:0.50] ; HSW-NOOPT-NEXT: addq $2, %rax # sched: [1:0.25] ; HSW-NOOPT-NEXT: imulq %rcx, %rax # sched: [3:1.00] -; HSW-NOOPT-NEXT: retq # sched: [2:1.00] +; HSW-NOOPT-NEXT: retq # sched: [1:1.00] ; ; JAG-NOOPT-LABEL: test_mul_spec: ; JAG-NOOPT: # BB#0: diff --git a/test/CodeGen/X86/pr32329.ll b/test/CodeGen/X86/pr32329.ll index e6e4ad7a713..1e2c48f6fc7 100644 --- a/test/CodeGen/X86/pr32329.ll +++ b/test/CodeGen/X86/pr32329.ll @@ -59,8 +59,8 @@ define void @foo() local_unnamed_addr { ; X86-NEXT: cmovnel %ecx, %esi ; X86-NEXT: cmpl %edx, %edi ; X86-NEXT: movl %ebp, var_50+4 -; X86-NEXT: setge var_205 ; X86-NEXT: movl %esi, var_50 +; X86-NEXT: setge var_205 ; X86-NEXT: imull %eax, %ebx ; X86-NEXT: movb %bl, var_218 ; X86-NEXT: popl %esi diff --git a/test/CodeGen/X86/recip-fastmath.ll b/test/CodeGen/X86/recip-fastmath.ll index e9e1ee5e742..16e261bf3c5 100644 --- a/test/CodeGen/X86/recip-fastmath.ll +++ b/test/CodeGen/X86/recip-fastmath.ll @@ -45,15 +45,15 @@ define float @f32_no_estimate(float %x) #0 { ; ; SANDY-LABEL: f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_no_estimate: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] -; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; HASWELL-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: @@ -63,9 +63,9 @@ define float @f32_no_estimate(float %x) #0 { ; ; AVX512-LABEL: f32_no_estimate: ; AVX512: # BB#0: -; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] -; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [13:1.00] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; AVX512-NEXT: vdivss %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -113,18 +113,18 @@ define float @f32_one_step(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_one_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_one_step: ; HASWELL-NO-FMA: # BB#0: @@ -139,9 +139,9 @@ define float @f32_one_step(float %x) #1 { ; AVX512-LABEL: f32_one_step: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -207,7 +207,7 @@ define float @f32_two_step(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -215,18 +215,18 @@ define float @f32_two_step(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_two_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 +; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_two_step: ; HASWELL-NO-FMA: # BB#0: @@ -245,13 +245,13 @@ define float @f32_two_step(float %x) #2 { ; AVX512-LABEL: f32_two_step: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 1.0, %x ret float %div } @@ -284,15 +284,15 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { ; ; SANDY-LABEL: v4f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] -; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} xmm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_no_estimate: ; HASWELL: # BB#0: -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01] -; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] +; HASWELL-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: @@ -302,9 +302,9 @@ define <4 x float> @v4f32_no_estimate(<4 x float> %x) #0 { ; ; AVX512-LABEL: v4f32_no_estimate: ; AVX512: # BB#0: -; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [?:5.000000e-01] -; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [13:1.00] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %xmm1 # sched: [4:0.50] +; AVX512-NEXT: vdivps %xmm0, %xmm1, %xmm0 # sched: [12:1.00] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -350,21 +350,21 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_one_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step: ; HASWELL-NO-FMA: # BB#0: @@ -379,17 +379,17 @@ define <4 x float> @v4f32_one_step(<4 x float> %x) #1 { ; KNL-LABEL: v4f32_one_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v4f32_one_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -453,9 +453,9 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -463,18 +463,18 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_two_step: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 +; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_two_step: ; HASWELL-NO-FMA: # BB#0: @@ -493,24 +493,24 @@ define <4 x float> @v4f32_two_step(<4 x float> %x) #2 { ; KNL-LABEL: v4f32_two_step: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 +; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v4f32_two_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 +; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -546,15 +546,15 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { ; ; SANDY-LABEL: v8f32_no_estimate: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] -; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [29:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps {{.*#+}} ymm1 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] +; SANDY-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [12:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_estimate: ; HASWELL: # BB#0: ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] -; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_estimate: ; HASWELL-NO-FMA: # BB#0: @@ -565,8 +565,8 @@ define <8 x float> @v8f32_no_estimate(<8 x float> %x) #0 { ; AVX512-LABEL: v8f32_no_estimate: ; AVX512: # BB#0: ; AVX512-NEXT: vbroadcastss {{.*}}(%rip), %ymm1 # sched: [5:1.00] -; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [21:2.00] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vdivps %ymm0, %ymm1, %ymm0 # sched: [19:2.00] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -621,19 +621,19 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_one_step: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step: ; HASWELL-NO-FMA: # BB#0: @@ -647,18 +647,18 @@ define <8 x float> @v8f32_one_step(<8 x float> %x) #1 { ; ; KNL-LABEL: v8f32_one_step: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_one_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -737,7 +737,7 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -745,18 +745,18 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_two_step: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 +; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_two_step: ; HASWELL-NO-FMA: # BB#0: @@ -774,25 +774,25 @@ define <8 x float> @v8f32_two_step(<8 x float> %x) #2 { ; ; KNL-LABEL: v8f32_two_step: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 +; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_two_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 +; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } diff --git a/test/CodeGen/X86/recip-fastmath2.ll b/test/CodeGen/X86/recip-fastmath2.ll index a3ac4596c07..440a6f0bef1 100644 --- a/test/CodeGen/X86/recip-fastmath2.ll +++ b/test/CodeGen/X86/recip-fastmath2.ll @@ -39,26 +39,26 @@ define float @f32_no_step_2(float %x) #3 { ; SANDY-LABEL: f32_no_step_2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_no_step_2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_no_step_2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; AVX512-LABEL: f32_no_step_2: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 1234.0, %x ret float %div } @@ -110,39 +110,39 @@ define float @f32_one_step_2(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_one_step_2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_one_step_2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; AVX512-LABEL: f32_one_step_2: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 3456.0, %x ret float %div } @@ -198,43 +198,43 @@ define float @f32_one_step_2_divs(float %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_one_step_2_divs: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_one_step_2_divs: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; AVX512-LABEL: f32_one_step_2_divs: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; AVX512-NEXT: vfnmadd213ss {{.*}}(%rip), %xmm1, %xmm0 +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm0 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; AVX512-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 3456.0, %x %div2 = fdiv fast float %div, %x ret float %div2 @@ -305,7 +305,7 @@ define float @f32_two_step_2(float %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -313,26 +313,26 @@ define float @f32_two_step_2(float %x) #2 { ; SANDY-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: f32_two_step_2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 +; HASWELL-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 +; HASWELL-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 +; HASWELL-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: f32_two_step_2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpss %xmm0, %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm1, %xmm0, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NO-FMA-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vsubss %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm2, %xmm1, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -340,20 +340,20 @@ define float @f32_two_step_2(float %x) #2 { ; HASWELL-NO-FMA-NEXT: vsubss %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulss %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddss %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; AVX512-LABEL: f32_two_step_2: ; AVX512: # BB#0: ; AVX512-NEXT: vrcp14ss %xmm0, %xmm0, %xmm1 -; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; AVX512-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; AVX512-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; AVX512-NEXT: retq # sched: [2:1.00] +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm0, %xmm3 +; AVX512-NEXT: vfmadd132ss %xmm1, %xmm1, %xmm3 +; AVX512-NEXT: vfnmadd213ss %xmm2, %xmm3, %xmm0 +; AVX512-NEXT: vfmadd132ss %xmm3, %xmm3, %xmm0 +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; AVX512-NEXT: retq # sched: [1:1.00] %div = fdiv fast float 6789.0, %x ret float %div } @@ -403,51 +403,51 @@ define <4 x float> @v4f32_one_step2(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_one_step2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v4f32_one_step2: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v4f32_one_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -501,56 +501,56 @@ define <4 x float> @v4f32_one_step_2_divs(<4 x float> %x) #1 { ; ; SANDY-LABEL: v4f32_one_step_2_divs: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_one_step_2_divs: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_one_step_2_divs: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v4f32_one_step_2_divs: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm1, %xmm0 +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; KNL-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v4f32_one_step_2_divs: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to4}, %xmm1, %xmm0 -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 # sched: [5:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [5:0.50] +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm0 +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm1 # sched: [9:0.50] ; SKX-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x %div2 = fdiv fast <4 x float> %div, %x ret <4 x float> %div2 @@ -619,9 +619,9 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; ; SANDY-LABEL: v4f32_two_step2: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [7:3.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [6:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} xmm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -629,26 +629,26 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; SANDY-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v4f32_two_step2: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; HASWELL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 +; HASWELL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 +; HASWELL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 +; HASWELL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v4f32_two_step2: ; HASWELL-NO-FMA: # BB#0: ; HASWELL-NO-FMA-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm1, %xmm0, %xmm2 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [?:5.000000e-01] +; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %xmm3 # sched: [4:0.50] ; HASWELL-NO-FMA-NEXT: vsubps %xmm2, %xmm3, %xmm2 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm2, %xmm1, %xmm2 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm2, %xmm1, %xmm1 # sched: [3:1.00] @@ -656,32 +656,32 @@ define <4 x float> @v4f32_two_step2(<4 x float> %x) #2 { ; HASWELL-NO-FMA-NEXT: vsubps %xmm0, %xmm3, %xmm0 # sched: [3:1.00] ; HASWELL-NO-FMA-NEXT: vmulps %xmm0, %xmm1, %xmm0 # sched: [5:0.50] ; HASWELL-NO-FMA-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v4f32_two_step2: ; KNL: # BB#0: ; KNL-NEXT: vrcpps %xmm0, %xmm1 # sched: [5:1.00] -; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; KNL-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; KNL-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 +; KNL-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 +; KNL-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 +; KNL-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 +; KNL-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v4f32_two_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %xmm0, %xmm1 -; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [?:5.000000e-01] +; SKX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2 # sched: [4:0.50] ; SKX-NEXT: vmovaps %xmm1, %xmm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 # sched: [5:0.50] -; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 # sched: [5:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm0, %xmm3 +; SKX-NEXT: vfmadd132ps %xmm1, %xmm1, %xmm3 +; SKX-NEXT: vfnmadd213ps %xmm2, %xmm3, %xmm0 +; SKX-NEXT: vfmadd132ps %xmm3, %xmm3, %xmm0 +; SKX-NEXT: vmulps {{.*}}(%rip), %xmm0, %xmm0 # sched: [9:0.50] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <4 x float> , %x ret <4 x float> %div } @@ -741,49 +741,49 @@ define <8 x float> @v8f32_one_step2(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_one_step2: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step2: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v8f32_one_step2: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_one_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -848,54 +848,54 @@ define <8 x float> @v8f32_one_step_2_divs(<8 x float> %x) #1 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [12:2.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_one_step_2_divs: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] -; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] +; HASWELL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_one_step_2_divs: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm2, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v8f32_one_step_2_divs: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] -; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm1, %ymm0 +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] +; KNL-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_one_step_2_divs: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vfnmadd213ps {{.*}}(%rip){1to8}, %ymm1, %ymm0 -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 # sched: [5:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [5:0.50] -; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm0 +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm1 # sched: [9:1.00] +; SKX-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x %div2 = fdiv fast <8 x float> %div, %x ret <8 x float> %div2 @@ -980,7 +980,7 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm1 # sched: [5:1.00] ; SANDY-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] -; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [7:0.50] +; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] sched: [4:0.50] ; SANDY-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] @@ -988,59 +988,59 @@ define <8 x float> @v8f32_two_step2(<8 x float> %x) #2 { ; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] ; SANDY-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; SANDY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_two_step2: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; HASWELL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; HASWELL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] -; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 +; HASWELL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 +; HASWELL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 +; HASWELL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_two_step2: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vbroadcastss {{.*}}(%rip), %ymm3 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm2, %ymm3, %ymm2 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps %ymm2, %ymm1, %ymm2 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm2, %ymm1, %ymm1 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps %ymm1, %ymm0, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vsubps %ymm0, %ymm3, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:0.50] +; HASWELL-NO-FMA-NEXT: vmulps %ymm0, %ymm1, %ymm0 # sched: [5:1.00] ; HASWELL-NO-FMA-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v8f32_two_step2: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [11:2.00] +; KNL-NEXT: vrcpps %ymm0, %ymm1 # sched: [7:2.00] ; KNL-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; KNL-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] -; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 +; KNL-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 +; KNL-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 +; KNL-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_two_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm1 ; SKX-NEXT: vbroadcastss {{.*}}(%rip), %ymm2 # sched: [5:1.00] ; SKX-NEXT: vmovaps %ymm1, %ymm3 # sched: [1:1.00] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 # sched: [5:0.50] -; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 # sched: [5:0.50] -; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 # sched: [5:0.50] -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm0, %ymm3 +; SKX-NEXT: vfmadd132ps %ymm1, %ymm1, %ymm3 +; SKX-NEXT: vfnmadd213ps %ymm2, %ymm3, %ymm0 +; SKX-NEXT: vfmadd132ps %ymm3, %ymm3, %ymm0 +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1070,27 +1070,27 @@ define <8 x float> @v8f32_no_step(<8 x float> %x) #3 { ; SANDY-LABEL: v8f32_no_step: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_step: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_step: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v8f32_no_step: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_no_step: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } @@ -1125,32 +1125,32 @@ define <8 x float> @v8f32_no_step2(<8 x float> %x) #3 { ; SANDY-LABEL: v8f32_no_step2: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpps %ymm0, %ymm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [12:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: v8f32_no_step2: ; HASWELL: # BB#0: -; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; HASWELL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; HASWELL-NO-FMA-LABEL: v8f32_no_step2: ; HASWELL-NO-FMA: # BB#0: -; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; HASWELL-NO-FMA-NEXT: retq # sched: [2:1.00] +; HASWELL-NO-FMA-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; HASWELL-NO-FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; HASWELL-NO-FMA-NEXT: retq # sched: [1:1.00] ; ; KNL-LABEL: v8f32_no_step2: ; KNL: # BB#0: -; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [11:2.00] -; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; KNL-NEXT: retq # sched: [2:1.00] +; KNL-NEXT: vrcpps %ymm0, %ymm0 # sched: [7:2.00] +; KNL-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; KNL-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: v8f32_no_step2: ; SKX: # BB#0: ; SKX-NEXT: vrcp14ps %ymm0, %ymm0 -; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [5:0.50] -; SKX-NEXT: retq # sched: [2:1.00] +; SKX-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 # sched: [9:1.00] +; SKX-NEXT: retq # sched: [1:1.00] %div = fdiv fast <8 x float> , %x ret <8 x float> %div } diff --git a/test/CodeGen/X86/sse-schedule.ll b/test/CodeGen/X86/sse-schedule.ll index e94935972c1..52e6b61aedf 100644 --- a/test/CodeGen/X86/sse-schedule.ll +++ b/test/CodeGen/X86/sse-schedule.ll @@ -31,14 +31,14 @@ define <4 x float> @test_addps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_addps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addps: ; BTVER2: # BB#0: @@ -73,14 +73,14 @@ define float @test_addss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_addss: ; SANDY: # BB#0: ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addss: ; BTVER2: # BB#0: @@ -122,15 +122,15 @@ define <4 x float> @test_andps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_andps: ; SANDY: # BB#0: -; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vandps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andps: ; BTVER2: # BB#0: @@ -176,15 +176,15 @@ define <4 x float> @test_andnotps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SANDY-LABEL: test_andnotps: ; SANDY: # BB#0: -; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vandnps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andnotps: ; BTVER2: # BB#0: @@ -228,16 +228,16 @@ define <4 x float> @test_cmpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_cmpps: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqps %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: vorps %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cmpps: ; BTVER2: # BB#0: @@ -277,13 +277,13 @@ define float @test_cmpss(float %a0, float %a1, float *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cmpss: ; BTVER2: # BB#0: @@ -347,30 +347,30 @@ define i32 @test_comiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_comiss: ; SANDY: # BB#0: ; SANDY-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_comiss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %cl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %cl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vcomiss (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %dl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %dl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_comiss: ; BTVER2: # BB#0: @@ -417,17 +417,17 @@ define float @test_cvtsi2ss(i32 %a0, i32 *%a1) { ; ; SANDY-LABEL: test_cvtsi2ss: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2ss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsi2ssl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2ssl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsi2ss: ; BTVER2: # BB#0: @@ -466,17 +466,17 @@ define float @test_cvtsi2ssq(i64 %a0, i64 *%a1) { ; ; SANDY-LABEL: test_cvtsi2ssq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2ssq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [5:2.00] +; HASWELL-NEXT: vcvtsi2ssq %rdi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2ssq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsi2ssq: ; BTVER2: # BB#0: @@ -515,17 +515,17 @@ define i32 @test_cvtss2si(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [10:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %ecx # sched: [3:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtss2si: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [4:1.00] +; HASWELL-NEXT: vcvtss2si (%rdi), %eax # sched: [8:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtss2si: ; BTVER2: # BB#0: @@ -567,17 +567,17 @@ define i64 @test_cvtss2siq(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvtss2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvtss2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtss2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [4:1.00] +; HASWELL-NEXT: vcvtss2si (%rdi), %rax # sched: [8:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtss2siq: ; BTVER2: # BB#0: @@ -619,17 +619,17 @@ define i32 @test_cvttss2si(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvttss2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [5:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [10:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %ecx # sched: [3:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttss2si: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttss2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [4:1.00] +; HASWELL-NEXT: vcvttss2si (%rdi), %eax # sched: [8:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvttss2si: ; BTVER2: # BB#0: @@ -668,17 +668,17 @@ define i64 @test_cvttss2siq(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvttss2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvttss2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvttss2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttss2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttss2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [4:1.00] +; HASWELL-NEXT: vcvttss2si (%rdi), %rax # sched: [8:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvttss2siq: ; BTVER2: # BB#0: @@ -714,15 +714,15 @@ define <4 x float> @test_divps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_divps: ; SANDY: # BB#0: -; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [20:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivps %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: vdivps (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divps: ; BTVER2: # BB#0: @@ -756,15 +756,15 @@ define float @test_divss(float %a0, float %a1, float *%a2) { ; ; SANDY-LABEL: test_divss: ; SANDY: # BB#0: -; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [20:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [13:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivss %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: vdivss (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divss: ; BTVER2: # BB#0: @@ -799,14 +799,14 @@ define void @test_ldmxcsr(i32 %a0) { ; SANDY-LABEL: test_ldmxcsr: ; SANDY: # BB#0: ; SANDY-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ldmxcsr: ; HASWELL: # BB#0: ; HASWELL-NEXT: movl %edi, -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [2:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vldmxcsr -{{[0-9]+}}(%rsp) # sched: [6:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_ldmxcsr: ; BTVER2: # BB#0: @@ -843,14 +843,14 @@ define <4 x float> @test_maxps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_maxps: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaxps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maxps: ; BTVER2: # BB#0: @@ -886,14 +886,14 @@ define <4 x float> @test_maxss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_maxss: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaxss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maxss: ; BTVER2: # BB#0: @@ -929,14 +929,14 @@ define <4 x float> @test_minps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_minps: ; SANDY: # BB#0: ; SANDY-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vminps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_minps: ; BTVER2: # BB#0: @@ -972,14 +972,14 @@ define <4 x float> @test_minss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_minss: ; SANDY: # BB#0: ; SANDY-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vminss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_minss: ; BTVER2: # BB#0: @@ -1017,17 +1017,17 @@ define void @test_movaps(<4 x float> *%a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movaps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movaps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovaps (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovaps %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movaps: ; BTVER2: # BB#0: @@ -1068,12 +1068,12 @@ define <4 x float> @test_movhlps(<4 x float> %a0, <4 x float> %a1) { ; SANDY-LABEL: test_movhlps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movhlps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm1[1],xmm0[1] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movhlps: ; BTVER2: # BB#0: @@ -1111,17 +1111,17 @@ define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movhps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movhps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movhps: ; BTVER2: # BB#0: @@ -1164,13 +1164,13 @@ define <4 x float> @test_movlhps(<4 x float> %a0, <4 x float> %a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movlhps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movlhps: ; BTVER2: # BB#0: @@ -1206,17 +1206,17 @@ define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movlps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movlps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movlps: ; BTVER2: # BB#0: @@ -1254,13 +1254,13 @@ define i32 @test_movmskps(<4 x float> %a0) { ; ; SANDY-LABEL: test_movmskps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovmskps %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovmskps %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movmskps: ; BTVER2: # BB#0: @@ -1295,13 +1295,13 @@ define void @test_movntps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movntps: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovntps %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntps: ; BTVER2: # BB#0: @@ -1335,17 +1335,17 @@ define void @test_movss_mem(float* %a0, float* %a1) { ; ; SANDY-LABEL: test_movss_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movss_mem: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vaddss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovss %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movss_mem: ; BTVER2: # BB#0: @@ -1383,13 +1383,13 @@ define <4 x float> @test_movss_reg(<4 x float> %a0, <4 x float> %a1) { ; ; SANDY-LABEL: test_movss_reg: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movss_reg: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movss_reg: ; BTVER2: # BB#0: @@ -1423,17 +1423,17 @@ define void @test_movups(<4 x float> *%a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_movups: ; SANDY: # BB#0: -; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movups: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovups (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddps %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovups %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movups: ; BTVER2: # BB#0: @@ -1469,14 +1469,14 @@ define <4 x float> @test_mulps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_mulps: ; SANDY: # BB#0: ; SANDY-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulps %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulps (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mulps: ; BTVER2: # BB#0: @@ -1511,14 +1511,14 @@ define float @test_mulss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_mulss: ; SANDY: # BB#0: ; SANDY-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulss %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulss (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mulss: ; BTVER2: # BB#0: @@ -1560,15 +1560,15 @@ define <4 x float> @test_orps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; SANDY-LABEL: test_orps: ; SANDY: # BB#0: -; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_orps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_orps: ; BTVER2: # BB#0: @@ -1609,13 +1609,13 @@ define void @test_prefetchnta(i8* %a0) { ; ; SANDY-LABEL: test_prefetchnta: ; SANDY: # BB#0: -; SANDY-NEXT: prefetchnta (%rdi) # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: prefetchnta (%rdi) # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_prefetchnta: ; HASWELL: # BB#0: -; HASWELL-NEXT: prefetchnta (%rdi) # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: prefetchnta (%rdi) # sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_prefetchnta: ; BTVER2: # BB#0: @@ -1652,17 +1652,17 @@ define <4 x float> @test_rcpps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_rcpps: ; SANDY: # BB#0: -; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [7:3.00] -; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rcpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpps %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vrcpps (%rdi), %xmm1 # sched: [9:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_rcpps: ; BTVER2: # BB#0: @@ -1708,18 +1708,18 @@ define <4 x float> @test_rcpss(float %a0, float *%a1) { ; SANDY-LABEL: test_rcpss: ; SANDY: # BB#0: ; SANDY-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rcpss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrcpss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vrcpss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_rcpss: ; BTVER2: # BB#0: @@ -1765,16 +1765,16 @@ define <4 x float> @test_rsqrtps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_rsqrtps: ; SANDY: # BB#0: ; SANDY-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [11:1.00] +; SANDY-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rsqrtps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrsqrtps %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [5:1.00] +; HASWELL-NEXT: vrsqrtps (%rdi), %xmm1 # sched: [9:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_rsqrtps: ; BTVER2: # BB#0: @@ -1819,19 +1819,19 @@ define <4 x float> @test_rsqrtss(float %a0, float *%a1) { ; ; SANDY-LABEL: test_rsqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] +; SANDY-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [9:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_rsqrtss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vrsqrtss %xmm0, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vrsqrtss %xmm1, %xmm1, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_rsqrtss: ; BTVER2: # BB#0: @@ -1875,12 +1875,12 @@ define void @test_sfence() { ; SANDY-LABEL: test_sfence: ; SANDY: # BB#0: ; SANDY-NEXT: sfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sfence: ; HASWELL: # BB#0: -; HASWELL-NEXT: sfence # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: sfence # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sfence: ; BTVER2: # BB#0: @@ -1917,14 +1917,14 @@ define <4 x float> @test_shufps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; SANDY-LABEL: test_shufps: ; SANDY: # BB#0: ; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] sched: [1:1.00] -; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,3],mem[0,0] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_shufps: ; BTVER2: # BB#0: @@ -1962,17 +1962,17 @@ define <4 x float> @test_sqrtps(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtps: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] -; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [20:1.00] +; SANDY-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [14:1.00] -; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [14:1.00] +; HASWELL-NEXT: vsqrtps %xmm0, %xmm0 # sched: [15:1.00] +; HASWELL-NEXT: vsqrtps (%rdi), %xmm1 # sched: [19:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sqrtps: ; BTVER2: # BB#0: @@ -2017,19 +2017,19 @@ define <4 x float> @test_sqrtss(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_sqrtss: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [114:1.00] -; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [114:1.00] +; SANDY-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; SANDY-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] +; SANDY-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [14:1.00] -; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [?:5.000000e-01] -; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [14:1.00] +; HASWELL-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; HASWELL-NEXT: vmovaps (%rdi), %xmm1 # sched: [4:0.50] +; HASWELL-NEXT: vsqrtss %xmm1, %xmm1, %xmm1 # sched: [19:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sqrtss: ; BTVER2: # BB#0: @@ -2067,15 +2067,15 @@ define i32 @test_stmxcsr() { ; ; SANDY-LABEL: test_stmxcsr: ; SANDY: # BB#0: -; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [5:1.00] -; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [5:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] +; SANDY-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_stmxcsr: ; HASWELL: # BB#0: -; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [1:1.00] -; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vstmxcsr -{{[0-9]+}}(%rsp) # sched: [7:1.00] +; HASWELL-NEXT: movl -{{[0-9]+}}(%rsp), %eax # sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_stmxcsr: ; BTVER2: # BB#0: @@ -2112,14 +2112,14 @@ define <4 x float> @test_subps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; SANDY-LABEL: test_subps: ; SANDY: # BB#0: ; SANDY-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_subps: ; BTVER2: # BB#0: @@ -2154,14 +2154,14 @@ define float @test_subss(float %a0, float %a1, float *%a2) { ; SANDY-LABEL: test_subss: ; SANDY: # BB#0: ; SANDY-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vsubss (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_subss: ; BTVER2: # BB#0: @@ -2220,30 +2220,30 @@ define i32 @test_ucomiss(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2) { ; SANDY-LABEL: test_ucomiss: ; SANDY: # BB#0: ; SANDY-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ucomiss: ; HASWELL: # BB#0: ; HASWELL-NEXT: vucomiss %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %cl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %cl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vucomiss (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %dl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %dl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_ucomiss: ; BTVER2: # BB#0: @@ -2292,14 +2292,14 @@ define <4 x float> @test_unpckhps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_unpckhps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpckhps: ; BTVER2: # BB#0: @@ -2338,14 +2338,14 @@ define <4 x float> @test_unpcklps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_unpcklps: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpcklps: ; BTVER2: # BB#0: @@ -2387,15 +2387,15 @@ define <4 x float> @test_xorps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a ; ; SANDY-LABEL: test_xorps: ; SANDY: # BB#0: -; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vxorps %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vxorps (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_xorps: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/sse2-schedule.ll b/test/CodeGen/X86/sse2-schedule.ll index 29782d8c01a..14c155c8c6c 100644 --- a/test/CodeGen/X86/sse2-schedule.ll +++ b/test/CodeGen/X86/sse2-schedule.ll @@ -31,14 +31,14 @@ define <2 x double> @test_addpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_addpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addpd: ; BTVER2: # BB#0: @@ -73,14 +73,14 @@ define double @test_addsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_addsd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addsd: ; BTVER2: # BB#0: @@ -117,17 +117,17 @@ define <2 x double> @test_andpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_andpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vandpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andpd: ; BTVER2: # BB#0: @@ -170,17 +170,17 @@ define <2 x double> @test_andnotpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SANDY-LABEL: test_andnotpd: ; SANDY: # BB#0: -; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_andnotpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vandnpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vandnpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_andnotpd: ; BTVER2: # BB#0: @@ -226,16 +226,16 @@ define <2 x double> @test_cmppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_cmppd: ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmppd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; HASWELL-NEXT: vorpd %xmm0, %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cmppd: ; BTVER2: # BB#0: @@ -275,13 +275,13 @@ define double @test_cmpsd(double %a0, double %a1, double *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cmpsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: vcmpeqsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cmpsd: ; BTVER2: # BB#0: @@ -345,30 +345,30 @@ define i32 @test_comisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; SANDY-LABEL: test_comisd: ; SANDY: # BB#0: ; SANDY-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_comisd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %cl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %cl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vcomisd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %dl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %dl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_comisd: ; BTVER2: # BB#0: @@ -416,16 +416,16 @@ define <2 x double> @test_cvtdq2pd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_cvtdq2pd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2pd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtdq2pd %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [4:1.00] +; HASWELL-NEXT: vcvtdq2pd (%rdi), %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtdq2pd: ; BTVER2: # BB#0: @@ -467,17 +467,17 @@ define <4 x float> @test_cvtdq2ps(<4 x i32> %a0, <4 x i32> *%a1) { ; ; SANDY-LABEL: test_cvtdq2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] +; SANDY-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtdq2ps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [3:1.00] +; HASWELL-NEXT: vcvtdq2ps %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: vcvtdq2ps (%rdi), %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtdq2ps: ; BTVER2: # BB#0: @@ -517,17 +517,17 @@ define <4 x i32> @test_cvtpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvtpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvtpd2dqx (%rdi), %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtpd2dq: ; BTVER2: # BB#0: @@ -568,17 +568,17 @@ define <4 x float> @test_cvtpd2ps(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvtpd2ps: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtpd2ps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtpd2ps %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvtpd2psx (%rdi), %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtpd2ps: ; BTVER2: # BB#0: @@ -620,16 +620,16 @@ define <4 x i32> @test_cvtps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_cvtps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtps2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [3:1.00] +; HASWELL-NEXT: vcvtps2dq (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtps2dq: ; BTVER2: # BB#0: @@ -670,17 +670,17 @@ define <2 x double> @test_cvtps2pd(<4 x float> %a0, <4 x float> *%a1) { ; ; SANDY-LABEL: test_cvtps2pd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtps2pd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtps2pd %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vcvtps2pd (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtps2pd: ; BTVER2: # BB#0: @@ -724,14 +724,14 @@ define i32 @test_cvtsd2si(double %a0, double *%a1) { ; SANDY-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00] ; SANDY-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsd2si: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [3:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [7:1.00] +; HASWELL-NEXT: vcvtsd2si %xmm0, %ecx # sched: [4:1.00] +; HASWELL-NEXT: vcvtsd2si (%rdi), %eax # sched: [8:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsd2si: ; BTVER2: # BB#0: @@ -773,17 +773,17 @@ define i64 @test_cvtsd2siq(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvtsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvtsd2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvtsd2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsd2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [4:1.00] +; HASWELL-NEXT: vcvtsd2si (%rdi), %rax # sched: [8:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsd2siq: ; BTVER2: # BB#0: @@ -830,18 +830,18 @@ define float @test_cvtsd2ss(double %a0, double *%a1) { ; SANDY-LABEL: test_cvtsd2ss: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsd2ss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [?:5.000000e-01] -; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [3:1.00] +; HASWELL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 # sched: [4:1.00] +; HASWELL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] +; HASWELL-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 # sched: [4:1.00] ; HASWELL-NEXT: vaddss %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsd2ss: ; BTVER2: # BB#0: @@ -882,16 +882,16 @@ define double @test_cvtsi2sd(i32 %a0, i32 *%a1) { ; SANDY-LABEL: test_cvtsi2sd: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2sd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2sdl (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsi2sd: ; BTVER2: # BB#0: @@ -931,16 +931,16 @@ define double @test_cvtsi2sdq(i64 %a0, i64 *%a1) { ; SANDY-LABEL: test_cvtsi2sdq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtsi2sdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtsi2sdq %rdi, %xmm0, %xmm0 # sched: [4:1.00] ; HASWELL-NEXT: vcvtsi2sdq (%rsi), %xmm1, %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtsi2sdq: ; BTVER2: # BB#0: @@ -985,19 +985,19 @@ define double @test_cvtss2sd(float %a0, float *%a1) { ; ; SANDY-LABEL: test_cvtss2sd: ; SANDY: # BB#0: -; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [6:0.50] -; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [1:1.00] +; SANDY-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] +; SANDY-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [3:1.00] ; SANDY-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvtss2sd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 # sched: [2:1.00] ; HASWELL-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvtss2sd: ; BTVER2: # BB#0: @@ -1038,17 +1038,17 @@ define <4 x i32> @test_cvttpd2dq(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_cvttpd2dq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [10:1.00] +; SANDY-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttpd2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttpd2dq %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [7:1.00] +; HASWELL-NEXT: vcvttpd2dqx (%rdi), %xmm1 # sched: [8:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvttpd2dq: ; BTVER2: # BB#0: @@ -1091,16 +1091,16 @@ define <4 x i32> @test_cvttps2dq(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_cvttps2dq: ; SANDY: # BB#0: ; SANDY-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttps2dq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttps2dq %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [3:1.00] +; HASWELL-NEXT: vcvttps2dq (%rdi), %xmm1 # sched: [7:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvttps2dq: ; BTVER2: # BB#0: @@ -1139,17 +1139,17 @@ define i32 @test_cvttsd2si(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvttsd2si: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [5:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %ecx # sched: [3:1.00] ; SANDY-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00] ; SANDY-NEXT: addl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttsd2si: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %ecx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [7:1.00] +; HASWELL-NEXT: vcvttsd2si (%rdi), %eax # sched: [8:1.00] ; HASWELL-NEXT: addl %ecx, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvttsd2si: ; BTVER2: # BB#0: @@ -1188,17 +1188,17 @@ define i64 @test_cvttsd2siq(double %a0, double *%a1) { ; ; SANDY-LABEL: test_cvttsd2siq: ; SANDY: # BB#0: -; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [5:1.00] -; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [10:1.00] +; SANDY-NEXT: vcvttsd2si %xmm0, %rcx # sched: [3:1.00] +; SANDY-NEXT: vcvttsd2si (%rdi), %rax # sched: [7:1.00] ; SANDY-NEXT: addq %rcx, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_cvttsd2siq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vcvttsd2si %xmm0, %rcx # sched: [4:1.00] -; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [4:1.00] +; HASWELL-NEXT: vcvttsd2si (%rdi), %rax # sched: [8:1.00] ; HASWELL-NEXT: addq %rcx, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_cvttsd2siq: ; BTVER2: # BB#0: @@ -1234,15 +1234,15 @@ define <2 x double> @test_divpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_divpd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [19:4.00] -; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: vdivpd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divpd: ; BTVER2: # BB#0: @@ -1276,15 +1276,15 @@ define double @test_divsd(double %a0, double %a1, double *%a2) { ; ; SANDY-LABEL: test_divsd: ; SANDY: # BB#0: -; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [28:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; SANDY-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_divsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [19:4.00] -; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [19:4.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # sched: [12:1.00] +; HASWELL-NEXT: vdivsd (%rdi), %xmm0, %xmm0 # sched: [16:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_divsd: ; BTVER2: # BB#0: @@ -1322,12 +1322,12 @@ define void @test_lfence() { ; SANDY-LABEL: test_lfence: ; SANDY: # BB#0: ; SANDY-NEXT: lfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_lfence: ; HASWELL: # BB#0: -; HASWELL-NEXT: lfence # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: lfence # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_lfence: ; BTVER2: # BB#0: @@ -1363,12 +1363,12 @@ define void @test_mfence() { ; SANDY-LABEL: test_mfence: ; SANDY: # BB#0: ; SANDY-NEXT: mfence # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mfence: ; HASWELL: # BB#0: -; HASWELL-NEXT: mfence # sched: [2:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: mfence # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mfence: ; BTVER2: # BB#0: @@ -1402,12 +1402,12 @@ define void @test_maskmovdqu(<16 x i8> %a0, <16 x i8> %a1, i8* %a2) { ; SANDY-LABEL: test_maskmovdqu: ; SANDY: # BB#0: ; SANDY-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maskmovdqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaskmovdqu %xmm1, %xmm0 # sched: [14:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maskmovdqu: ; BTVER2: # BB#0: @@ -1440,14 +1440,14 @@ define <2 x double> @test_maxpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_maxpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaxpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maxpd: ; BTVER2: # BB#0: @@ -1483,14 +1483,14 @@ define <2 x double> @test_maxsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_maxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_maxsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_maxsd: ; BTVER2: # BB#0: @@ -1526,14 +1526,14 @@ define <2 x double> @test_minpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_minpd: ; SANDY: # BB#0: ; SANDY-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vminpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_minpd: ; BTVER2: # BB#0: @@ -1569,14 +1569,14 @@ define <2 x double> @test_minsd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_minsd: ; SANDY: # BB#0: ; SANDY-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_minsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vminsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vminsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_minsd: ; BTVER2: # BB#0: @@ -1614,17 +1614,17 @@ define void @test_movapd(<2 x double> *%a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_movapd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movapd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovapd (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovapd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movapd: ; BTVER2: # BB#0: @@ -1662,17 +1662,17 @@ define void @test_movdqa(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; SANDY-LABEL: test_movdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movdqa: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovdqa (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovdqa %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movdqa: ; BTVER2: # BB#0: @@ -1710,17 +1710,17 @@ define void @test_movdqu(<2 x i64> *%a0, <2 x i64> *%a1) { ; ; SANDY-LABEL: test_movdqu: ; SANDY: # BB#0: -; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movdqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovdqu (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovdqu %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movdqu: ; BTVER2: # BB#0: @@ -1768,22 +1768,22 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; SANDY-LABEL: test_movd: ; SANDY: # BB#0: ; SANDY-NEXT: vmovd %edi, %xmm1 # sched: [1:0.33] -; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [6:0.50] +; SANDY-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovd %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:0.25] -; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovd %edi, %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [4:0.50] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vmovd %xmm0, %eax # sched: [1:1.00] -; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovd %xmm1, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movd: ; BTVER2: # BB#0: @@ -1838,23 +1838,23 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; ; SANDY-LABEL: test_movd_64: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.33] +; SANDY-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; SANDY-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] -; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovq %xmm0, %rax # sched: [1:0.33] +; SANDY-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movd_64: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovq %rdi, %xmm1 # sched: [1:1.00] -; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [4:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] ; HASWELL-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] ; HASWELL-NEXT: vmovq %xmm0, %rax # sched: [1:1.00] -; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovq %xmm1, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # BB#0: @@ -1900,17 +1900,17 @@ define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movhpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movhpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movhpd: ; BTVER2: # BB#0: @@ -1951,17 +1951,17 @@ define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) { ; ; SANDY-LABEL: test_movlpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00] +; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movlpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00] +; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movlpd: ; BTVER2: # BB#0: @@ -1998,13 +1998,13 @@ define i32 @test_movmskpd(<2 x double> %a0) { ; ; SANDY-LABEL: test_movmskpd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [2:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovmskpd %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movmskpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovmskpd %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movmskpd: ; BTVER2: # BB#0: @@ -2039,14 +2039,14 @@ define void @test_movntdqa(<2 x i64> %a0, <2 x i64> *%a1) { ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %xmm0, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovntdq %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: @@ -2080,14 +2080,14 @@ define void @test_movntpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_movntpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovntpd %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntpd: ; BTVER2: # BB#0: @@ -2123,17 +2123,17 @@ define <2 x i64> @test_movq_mem(<2 x i64> %a0, i64 *%a1) { ; ; SANDY-LABEL: test_movq_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movq_mem: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero sched: [4:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovq %xmm0, (%rdi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movq_mem: ; BTVER2: # BB#0: @@ -2174,13 +2174,13 @@ define <2 x i64> @test_movq_reg(<2 x i64> %a0, <2 x i64> %a1) { ; SANDY: # BB#0: ; SANDY-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; SANDY-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movq_reg: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero sched: [1:0.33] ; HASWELL-NEXT: vpaddq %xmm0, %xmm1, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movq_reg: ; BTVER2: # BB#0: @@ -2216,17 +2216,17 @@ define void @test_movsd_mem(double* %a0, double* %a1) { ; ; SANDY-LABEL: test_movsd_mem: ; SANDY: # BB#0: -; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [6:0.50] +; SANDY-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] ; SANDY-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsd_mem: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero sched: [4:0.50] ; HASWELL-NEXT: vaddsd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovsd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movsd_mem: ; BTVER2: # BB#0: @@ -2266,12 +2266,12 @@ define <2 x double> @test_movsd_reg(<2 x double> %a0, <2 x double> %a1) { ; SANDY-LABEL: test_movsd_reg: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsd_reg: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movsd_reg: ; BTVER2: # BB#0: @@ -2305,17 +2305,17 @@ define void @test_movupd(<2 x double> *%a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_movupd: ; SANDY: # BB#0: -; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [6:0.50] +; SANDY-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movupd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovupd (%rdi), %xmm0 # sched: [4:0.50] ; HASWELL-NEXT: vaddpd %xmm0, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [?:1.000000e+00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovupd %xmm0, (%rsi) # sched: [1:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movupd: ; BTVER2: # BB#0: @@ -2351,14 +2351,14 @@ define <2 x double> @test_mulpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_mulpd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulpd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mulpd: ; BTVER2: # BB#0: @@ -2393,14 +2393,14 @@ define double @test_mulsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_mulsd: ; SANDY: # BB#0: ; SANDY-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mulsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmulsd (%rdi), %xmm0, %xmm0 # sched: [9:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mulsd: ; BTVER2: # BB#0: @@ -2437,17 +2437,17 @@ define <2 x double> @test_orpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_orpd: ; SANDY: # BB#0: -; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_orpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_orpd: ; BTVER2: # BB#0: @@ -2496,14 +2496,14 @@ define <8 x i16> @test_packssdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_packssdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packssdw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpackssdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_packssdw: ; BTVER2: # BB#0: @@ -2548,14 +2548,14 @@ define <16 x i8> @test_packsswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_packsswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packsswb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpacksswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_packsswb: ; BTVER2: # BB#0: @@ -2600,14 +2600,14 @@ define <16 x i8> @test_packuswb(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_packuswb: ; SANDY: # BB#0: ; SANDY-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packuswb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpackuswb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_packuswb: ; BTVER2: # BB#0: @@ -2648,14 +2648,14 @@ define <16 x i8> @test_paddb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddb: ; BTVER2: # BB#0: @@ -2694,14 +2694,14 @@ define <4 x i32> @test_paddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_paddd: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddd: ; BTVER2: # BB#0: @@ -2736,14 +2736,14 @@ define <2 x i64> @test_paddq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_paddq: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddq: ; BTVER2: # BB#0: @@ -2782,14 +2782,14 @@ define <16 x i8> @test_paddsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddsb: ; BTVER2: # BB#0: @@ -2829,14 +2829,14 @@ define <8 x i16> @test_paddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_paddsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddsw: ; BTVER2: # BB#0: @@ -2876,14 +2876,14 @@ define <16 x i8> @test_paddusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_paddusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddusb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddusb: ; BTVER2: # BB#0: @@ -2923,14 +2923,14 @@ define <8 x i16> @test_paddusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_paddusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddusw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddusw: ; BTVER2: # BB#0: @@ -2970,14 +2970,14 @@ define <8 x i16> @test_paddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_paddw: ; SANDY: # BB#0: ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_paddw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_paddw: ; BTVER2: # BB#0: @@ -3015,16 +3015,16 @@ define <2 x i64> @test_pand(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pand: ; SANDY: # BB#0: ; SANDY-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pand: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpand %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpand (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pand: ; BTVER2: # BB#0: @@ -3070,16 +3070,16 @@ define <2 x i64> @test_pandn(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pandn: ; SANDY: # BB#0: ; SANDY-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pandn: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpandn %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [1:0.50] +; HASWELL-NEXT: vpandn (%rdi), %xmm0, %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pandn: ; BTVER2: # BB#0: @@ -3122,14 +3122,14 @@ define <16 x i8> @test_pavgb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pavgb: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pavgb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpavgb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pavgb: ; BTVER2: # BB#0: @@ -3169,14 +3169,14 @@ define <8 x i16> @test_pavgw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pavgw: ; SANDY: # BB#0: ; SANDY-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pavgw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpavgw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pavgw: ; BTVER2: # BB#0: @@ -3217,16 +3217,16 @@ define <16 x i8> @test_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pcmpeqb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpeqb: ; BTVER2: # BB#0: @@ -3269,16 +3269,16 @@ define <4 x i32> @test_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pcmpeqd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpeqd: ; BTVER2: # BB#0: @@ -3321,16 +3321,16 @@ define <8 x i16> @test_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pcmpeqw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpeqw: ; BTVER2: # BB#0: @@ -3374,16 +3374,16 @@ define <16 x i8> @test_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pcmpgtb: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpgtb: ; BTVER2: # BB#0: @@ -3427,16 +3427,16 @@ define <4 x i32> @test_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pcmpgtd: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpgtd: ; BTVER2: # BB#0: @@ -3480,16 +3480,16 @@ define <8 x i16> @test_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pcmpgtw: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm0, %xmm1, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpgtw: ; BTVER2: # BB#0: @@ -3526,15 +3526,15 @@ define i16 @test_pextrw(<8 x i16> %a0) { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: # kill: %AX %AX %EAX -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [2:1.00] +; HASWELL-NEXT: vpextrw $6, %xmm0, %eax # sched: [1:1.00] ; HASWELL-NEXT: # kill: %AX %AX %EAX -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: @@ -3570,15 +3570,15 @@ define <8 x i16> @test_pinsrw(<8 x i16> %a0, i16 %a1, i16 *%a2) { ; ; SANDY-LABEL: test_pinsrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpinsrw $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pinsrw: ; BTVER2: # BB#0: @@ -3620,15 +3620,15 @@ define <4 x i32> @test_pmaddwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmaddwd: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaddwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaddwd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaddwd: ; BTVER2: # BB#0: @@ -3669,14 +3669,14 @@ define <8 x i16> @test_pmaxsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pmaxsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaxsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaxsw: ; BTVER2: # BB#0: @@ -3716,14 +3716,14 @@ define <16 x i8> @test_pmaxub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pmaxub: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxub: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaxub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaxub: ; BTVER2: # BB#0: @@ -3763,14 +3763,14 @@ define <8 x i16> @test_pminsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pminsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpminsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pminsw: ; BTVER2: # BB#0: @@ -3810,14 +3810,14 @@ define <16 x i8> @test_pminub(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pminub: ; SANDY: # BB#0: ; SANDY-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminub: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminub %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpminub (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pminub: ; BTVER2: # BB#0: @@ -3851,13 +3851,13 @@ define i32 @test_pmovmskb(<16 x i8> %a0) { ; ; SANDY-LABEL: test_pmovmskb: ; SANDY: # BB#0: -; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmovmskb %xmm0, %eax # sched: [1:0.33] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovmskb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovmskb %xmm0, %eax # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovmskb: ; BTVER2: # BB#0: @@ -3891,13 +3891,13 @@ define <8 x i16> @test_pmulhuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulhuw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmulhuw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmulhuw: ; BTVER2: # BB#0: @@ -3932,15 +3932,15 @@ define <8 x i16> @test_pmulhw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmulhw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulhw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmulhw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmulhw: ; BTVER2: # BB#0: @@ -3975,15 +3975,15 @@ define <8 x i16> @test_pmullw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmullw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmullw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmullw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmullw: ; BTVER2: # BB#0: @@ -4027,13 +4027,13 @@ define <2 x i64> @test_pmuludq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmuludq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmuludq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmuludq: ; BTVER2: # BB#0: @@ -4073,16 +4073,16 @@ define <2 x i64> @test_por(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_por: ; SANDY: # BB#0: ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_por: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_por: ; BTVER2: # BB#0: @@ -4126,15 +4126,15 @@ define <2 x i64> @test_psadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_psadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psadbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsadbw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psadbw: ; BTVER2: # BB#0: @@ -4176,16 +4176,16 @@ define <4 x i32> @test_pshufd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_pshufd: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:0.50] -; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [7:0.50] +; SANDY-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshufd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,3,2] sched: [1:1.00] -; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [1:1.00] +; HASWELL-NEXT: vpshufd {{.*#+}} xmm1 = mem[3,2,1,0] sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pshufd: ; BTVER2: # BB#0: @@ -4227,16 +4227,16 @@ define <8 x i16> @test_pshufhw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pshufhw: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:0.50] -; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [7:0.50] +; SANDY-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshufhw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,4,7,6] sched: [1:1.00] -; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [1:1.00] +; HASWELL-NEXT: vpshufhw {{.*#+}} xmm1 = mem[0,1,2,3,7,6,5,4] sched: [5:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pshufhw: ; BTVER2: # BB#0: @@ -4278,16 +4278,16 @@ define <8 x i16> @test_pshuflw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pshuflw: ; SANDY: # BB#0: ; SANDY-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:0.50] -; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [7:0.50] +; SANDY-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshuflw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,0,3,2,4,5,6,7] sched: [1:1.00] -; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [1:1.00] +; HASWELL-NEXT: vpshuflw {{.*#+}} xmm1 = mem[3,2,1,0,4,5,6,7] sched: [5:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pshuflw: ; BTVER2: # BB#0: @@ -4328,15 +4328,15 @@ define <4 x i32> @test_pslld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pslld: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpslld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpslld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpslld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pslld: ; BTVER2: # BB#0: @@ -4378,12 +4378,12 @@ define <4 x i32> @test_pslldq(<4 x i32> %a0) { ; SANDY-LABEL: test_pslldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pslldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pslldq: ; BTVER2: # BB#0: @@ -4419,15 +4419,15 @@ define <2 x i64> @test_psllq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psllq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsllq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsllq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psllq: ; BTVER2: # BB#0: @@ -4470,15 +4470,15 @@ define <8 x i16> @test_psllw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY: # BB#0: ; SANDY-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] ; SANDY-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] -; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psllw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [4:1.00] -; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsllw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsllw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psllw: ; BTVER2: # BB#0: @@ -4519,17 +4519,17 @@ define <4 x i32> @test_psrad(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_psrad: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrad: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsrad (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsrad $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psrad: ; BTVER2: # BB#0: @@ -4570,17 +4570,17 @@ define <8 x i16> @test_psraw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psraw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psraw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsraw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsraw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psraw: ; BTVER2: # BB#0: @@ -4621,17 +4621,17 @@ define <4 x i32> @test_psrld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_psrld: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrld: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsrld (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsrld $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psrld: ; BTVER2: # BB#0: @@ -4673,12 +4673,12 @@ define <4 x i32> @test_psrldq(<4 x i32> %a0) { ; SANDY-LABEL: test_psrldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psrldq: ; BTVER2: # BB#0: @@ -4712,17 +4712,17 @@ define <2 x i64> @test_psrlq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_psrlq: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrlq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsrlq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsrlq $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psrlq: ; BTVER2: # BB#0: @@ -4763,17 +4763,17 @@ define <8 x i16> @test_psrlw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_psrlw: ; SANDY: # BB#0: -; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psrlw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [2:1.00] +; HASWELL-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpsrlw $2, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psrlw: ; BTVER2: # BB#0: @@ -4816,14 +4816,14 @@ define <16 x i8> @test_psubb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubb: ; BTVER2: # BB#0: @@ -4862,14 +4862,14 @@ define <4 x i32> @test_psubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_psubd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubd: ; BTVER2: # BB#0: @@ -4904,14 +4904,14 @@ define <2 x i64> @test_psubq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_psubq: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubq: ; BTVER2: # BB#0: @@ -4950,14 +4950,14 @@ define <16 x i8> @test_psubsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubsb: ; BTVER2: # BB#0: @@ -4997,14 +4997,14 @@ define <8 x i16> @test_psubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubsw: ; BTVER2: # BB#0: @@ -5044,14 +5044,14 @@ define <16 x i8> @test_psubusb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psubusb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubusb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubusb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubusb: ; BTVER2: # BB#0: @@ -5091,14 +5091,14 @@ define <8 x i16> @test_psubusw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubusw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubusw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubusw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubusw: ; BTVER2: # BB#0: @@ -5138,14 +5138,14 @@ define <8 x i16> @test_psubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psubw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psubw: ; BTVER2: # BB#0: @@ -5184,14 +5184,14 @@ define <16 x i8> @test_punpckhbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_punpckhbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:0.50] -; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpckhbw: ; BTVER2: # BB#0: @@ -5231,16 +5231,16 @@ define <4 x i32> @test_punpckhdq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_punpckhdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [7:0.50] +; SANDY-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [1:1.00] +; HASWELL-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm1[2],mem[2],xmm1[3],mem[3] sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpckhdq: ; BTVER2: # BB#0: @@ -5280,16 +5280,16 @@ define <2 x i64> @test_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-LABEL: test_punpckhqdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:0.50] +; SANDY-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhqdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; HASWELL-NEXT: vpunpckhqdq {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpckhqdq: ; BTVER2: # BB#0: @@ -5330,14 +5330,14 @@ define <8 x i16> @test_punpckhwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_punpckhwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckhwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpckhwd: ; BTVER2: # BB#0: @@ -5376,14 +5376,14 @@ define <16 x i8> @test_punpcklbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_punpcklbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpcklbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpcklbw: ; BTVER2: # BB#0: @@ -5423,16 +5423,16 @@ define <4 x i32> @test_punpckldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_punpckldq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:0.50] -; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [7:0.50] +; SANDY-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpckldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [1:1.00] +; HASWELL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpckldq: ; BTVER2: # BB#0: @@ -5472,16 +5472,16 @@ define <2 x i64> @test_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) ; SANDY-LABEL: test_punpcklqdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:0.50] +; SANDY-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpcklqdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] +; HASWELL-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpcklqdq: ; BTVER2: # BB#0: @@ -5522,14 +5522,14 @@ define <8 x i16> @test_punpcklwd(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_punpcklwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:0.50] -; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_punpcklwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] sched: [1:1.00] -; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_punpcklwd: ; BTVER2: # BB#0: @@ -5567,16 +5567,16 @@ define <2 x i64> @test_pxor(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pxor: ; SANDY: # BB#0: ; SANDY-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [7:0.50] +; SANDY-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pxor: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpxor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [1:0.50] +; HASWELL-NEXT: vpxor (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pxor: ; BTVER2: # BB#0: @@ -5616,16 +5616,16 @@ define <2 x double> @test_shufpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; SANDY-LABEL: test_shufpd: ; SANDY: # BB#0: ; SANDY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [7:1.00] +; SANDY-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_shufpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [1:1.00] +; HASWELL-NEXT: vshufpd {{.*#+}} xmm1 = xmm1[1],mem[0] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_shufpd: ; BTVER2: # BB#0: @@ -5665,17 +5665,17 @@ define <2 x double> @test_sqrtpd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_sqrtpd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [22:1.00] -; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [28:1.00] +; SANDY-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] +; SANDY-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [21:1.00] -; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [21:1.00] +; HASWELL-NEXT: vsqrtpd %xmm0, %xmm0 # sched: [15:1.00] +; HASWELL-NEXT: vsqrtpd (%rdi), %xmm1 # sched: [19:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sqrtpd: ; BTVER2: # BB#0: @@ -5720,19 +5720,19 @@ define <2 x double> @test_sqrtsd(<2 x double> %a0, <2 x double> *%a1) { ; ; SANDY-LABEL: test_sqrtsd: ; SANDY: # BB#0: -; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] -; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [6:0.50] -; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; SANDY-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; SANDY-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] +; SANDY-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_sqrtsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [21:1.00] -; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [?:5.000000e-01] -; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [21:1.00] +; HASWELL-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # sched: [19:1.00] +; HASWELL-NEXT: vmovapd (%rdi), %xmm1 # sched: [4:0.50] +; HASWELL-NEXT: vsqrtsd %xmm1, %xmm1, %xmm1 # sched: [19:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_sqrtsd: ; BTVER2: # BB#0: @@ -5771,14 +5771,14 @@ define <2 x double> @test_subpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; SANDY-LABEL: test_subpd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_subpd: ; BTVER2: # BB#0: @@ -5813,14 +5813,14 @@ define double @test_subsd(double %a0, double %a1, double *%a2) { ; SANDY-LABEL: test_subsd: ; SANDY: # BB#0: ; SANDY-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_subsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vsubsd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_subsd: ; BTVER2: # BB#0: @@ -5879,30 +5879,30 @@ define i32 @test_ucomisd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) ; SANDY-LABEL: test_ucomisd: ; SANDY: # BB#0: ; SANDY-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %cl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; SANDY-NEXT: setnp %al # sched: [1:1.00] -; SANDY-NEXT: sete %dl # sched: [1:1.00] +; SANDY-NEXT: setnp %al # sched: [1:0.33] +; SANDY-NEXT: sete %dl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %dl # sched: [1:0.33] ; SANDY-NEXT: orb %cl, %dl # sched: [1:0.33] ; SANDY-NEXT: movzbl %dl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ucomisd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vucomisd %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %cl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %cl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: vucomisd (%rdi), %xmm0 # sched: [7:1.00] -; HASWELL-NEXT: setnp %al # sched: [1:1.00] -; HASWELL-NEXT: sete %dl # sched: [1:1.00] +; HASWELL-NEXT: setnp %al # sched: [1:0.50] +; HASWELL-NEXT: sete %dl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %dl # sched: [1:0.25] ; HASWELL-NEXT: orb %cl, %dl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %dl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_ucomisd: ; BTVER2: # BB#0: @@ -5950,16 +5950,16 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_unpckhpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [7:1.00] +; SANDY-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpckhpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] sched: [1:1.00] -; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [1:1.00] +; HASWELL-NEXT: vunpckhpd {{.*#+}} xmm1 = xmm1[1],mem[1] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpckhpd: ; BTVER2: # BB#0: @@ -6005,16 +6005,16 @@ define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_unpcklpd: ; SANDY: # BB#0: ; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [1:1.00] +; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # BB#0: @@ -6053,17 +6053,17 @@ define <2 x double> @test_xorpd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_xorpd: ; SANDY: # BB#0: -; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_xorpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vxorpd %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vxorpd (%rdi), %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_xorpd: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/sse3-schedule.ll b/test/CodeGen/X86/sse3-schedule.ll index 672ae838a04..482b2fcab64 100644 --- a/test/CodeGen/X86/sse3-schedule.ll +++ b/test/CodeGen/X86/sse3-schedule.ll @@ -31,14 +31,14 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; SANDY-LABEL: test_addsubpd: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addsubpd: ; BTVER2: # BB#0: @@ -74,14 +74,14 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; SANDY-LABEL: test_addsubps: ; SANDY: # BB#0: ; SANDY-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_addsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_addsubps: ; BTVER2: # BB#0: @@ -116,15 +116,15 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; SANDY-LABEL: test_haddpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_haddpd: ; BTVER2: # BB#0: @@ -159,15 +159,15 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; SANDY-LABEL: test_haddps: ; SANDY: # BB#0: -; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_haddps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_haddps: ; BTVER2: # BB#0: @@ -202,15 +202,15 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double ; ; SANDY-LABEL: test_hsubpd: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_hsubpd: ; BTVER2: # BB#0: @@ -245,15 +245,15 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *% ; ; SANDY-LABEL: test_hsubps: ; SANDY: # BB#0: -; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [11:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_hsubps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [9:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_hsubps: ; BTVER2: # BB#0: @@ -287,13 +287,13 @@ define <16 x i8> @test_lddqu(i8* %a0) { ; ; SANDY-LABEL: test_lddqu: ; SANDY: # BB#0: -; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_lddqu: ; HASWELL: # BB#0: -; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vlddqu (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_lddqu: ; BTVER2: # BB#0: @@ -330,16 +330,16 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_movddup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [6:0.50] +; SANDY-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movddup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:1.00] -; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [4:0.50] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movddup: ; BTVER2: # BB#0: @@ -380,16 +380,16 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_movshdup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [6:0.50] +; SANDY-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movshdup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:1.00] -; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [4:0.50] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movshdup: ; BTVER2: # BB#0: @@ -430,16 +430,16 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_movsldup: ; SANDY: # BB#0: ; SANDY-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [6:0.50] +; SANDY-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movsldup: ; HASWELL: # BB#0: ; HASWELL-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:1.00] -; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [?:5.000000e-01] +; HASWELL-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [4:0.50] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movsldup: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/sse41-schedule.ll b/test/CodeGen/X86/sse41-schedule.ll index b30d8164157..340b9abe887 100644 --- a/test/CodeGen/X86/sse41-schedule.ll +++ b/test/CodeGen/X86/sse41-schedule.ll @@ -25,17 +25,17 @@ define <2 x double> @test_blendpd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; ; SANDY-LABEL: test_blendpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.50] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] sched: [1:0.33] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],mem[1] sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendpd: ; BTVER2: # BB#0: @@ -65,15 +65,15 @@ define <4 x float> @test_blendps(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; ; SANDY-LABEL: test_blendps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:1.00] -; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.50] +; SANDY-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2],xmm0[3] sched: [1:0.33] -; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],mem[1],xmm0[2,3] sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendps: ; BTVER2: # BB#0: @@ -107,15 +107,15 @@ define <2 x double> @test_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x doub ; ; SANDY-LABEL: test_blendvpd: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvpd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendvpd %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendvpd: ; BTVER2: # BB#0: @@ -150,15 +150,15 @@ define <4 x float> @test_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> ; ; SANDY-LABEL: test_blendvps: ; SANDY: # BB#0: -; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:2.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] +; SANDY-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_blendvps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vblendvps %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_blendvps: ; BTVER2: # BB#0: @@ -187,15 +187,15 @@ define <2 x double> @test_dppd(<2 x double> %a0, <2 x double> %a1, <2 x double> ; ; SANDY-LABEL: test_dppd: ; SANDY: # BB#0: -; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [15:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_dppd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vdppd $7, %xmm1, %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdppd $7, (%rdi), %xmm0, %xmm0 # sched: [13:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_dppd: ; BTVER2: # BB#0: @@ -224,15 +224,15 @@ define <4 x float> @test_dpps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%a2 ; ; SANDY-LABEL: test_dpps: ; SANDY: # BB#0: -; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [12:2.00] +; SANDY-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_dpps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vdpps $7, %xmm1, %xmm0, %xmm0 # sched: [14:2.00] -; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [14:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vdpps $7, (%rdi), %xmm0, %xmm0 # sched: [18:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_dpps: ; BTVER2: # BB#0: @@ -262,14 +262,14 @@ define <4 x float> @test_insertps(<4 x float> %a0, <4 x float> %a1, float *%a2) ; SANDY-LABEL: test_insertps: ; SANDY: # BB#0: ; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [7:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_insertps: ; HASWELL: # BB#0: ; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm1[0],xmm0[2,3] sched: [1:1.00] -; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_insertps: ; BTVER2: # BB#0: @@ -296,13 +296,13 @@ define <2 x i64> @test_movntdqa(i8* %a0) { ; ; SANDY-LABEL: test_movntdqa: ; SANDY: # BB#0: -; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [6:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_movntdqa: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [?:5.000000e-01] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmovntdqa (%rdi), %xmm0 # sched: [4:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_movntdqa: ; BTVER2: # BB#0: @@ -328,15 +328,15 @@ define <8 x i16> @test_mpsadbw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_mpsadbw: ; SANDY: # BB#0: -; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_mpsadbw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [7:2.00] -; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [7:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vmpsadbw $7, %xmm1, %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: vmpsadbw $7, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_mpsadbw: ; BTVER2: # BB#0: @@ -367,14 +367,14 @@ define <8 x i16> @test_packusdw(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_packusdw: ; SANDY: # BB#0: ; SANDY-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_packusdw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpackusdw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_packusdw: ; BTVER2: # BB#0: @@ -411,14 +411,14 @@ define <16 x i8> @test_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2, <16 ; SANDY-LABEL: test_pblendvb: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [8:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pblendvb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpblendvb %xmm2, (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pblendvb: ; BTVER2: # BB#0: @@ -448,14 +448,14 @@ define <8 x i16> @test_pblendw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pblendw: ; SANDY: # BB#0: ; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:0.50] -; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pblendw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] sched: [1:1.00] -; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],mem[2,3],xmm0[4,5,6],mem[7] sched: [4:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pblendw: ; BTVER2: # BB#0: @@ -484,14 +484,14 @@ define <2 x i64> @test_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; SANDY-LABEL: test_pcmpeqq: ; SANDY: # BB#0: ; SANDY-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpeqq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpeqq: ; BTVER2: # BB#0: @@ -521,15 +521,15 @@ define i32 @test_pextrb(<16 x i8> %a0, i8 *%a1) { ; ; SANDY-LABEL: test_pextrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrb: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpextrb $3, %xmm0, %eax # sched: [1:1.00] +; HASWELL-NEXT: vpextrb $1, %xmm0, (%rdi) # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pextrb: ; BTVER2: # BB#0: @@ -558,15 +558,15 @@ define i32 @test_pextrd(<4 x i32> %a0, i32 *%a1) { ; ; SANDY-LABEL: test_pextrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpextrd $3, %xmm0, %eax # sched: [1:1.00] +; HASWELL-NEXT: vpextrd $1, %xmm0, (%rdi) # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pextrd: ; BTVER2: # BB#0: @@ -594,15 +594,15 @@ define i64 @test_pextrq(<2 x i64> %a0, <2 x i64> %a1, i64 *%a2) { ; ; SANDY-LABEL: test_pextrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [3:1.00] +; SANDY-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:0.50] ; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [2:1.00] -; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpextrq $1, %xmm0, %rax # sched: [1:1.00] +; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pextrq: ; BTVER2: # BB#0: @@ -630,15 +630,15 @@ define i32 @test_pextrw(<8 x i16> %a0, i16 *%a1) { ; ; SANDY-LABEL: test_pextrw: ; SANDY: # BB#0: -; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [3:1.00] +; SANDY-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:0.50] ; SANDY-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pextrw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [2:1.00] -; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpextrw $3, %xmm0, %eax # sched: [1:1.00] +; HASWELL-NEXT: vpextrw $1, %xmm0, (%rdi) # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pextrw: ; BTVER2: # BB#0: @@ -667,15 +667,15 @@ define <8 x i16> @test_phminposuw(<8 x i16> *%a0) { ; ; SANDY-LABEL: test_phminposuw: ; SANDY: # BB#0: -; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] ; SANDY-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phminposuw: ; HASWELL: # BB#0: -; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: vphminposuw (%rdi), %xmm0 # sched: [9:1.00] ; HASWELL-NEXT: vphminposuw %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phminposuw: ; BTVER2: # BB#0: @@ -704,15 +704,15 @@ define <16 x i8> @test_pinsrb(<16 x i8> %a0, i8 %a1, i8 *%a2) { ; ; SANDY-LABEL: test_pinsrb: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrb: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpinsrb $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pinsrb: ; BTVER2: # BB#0: @@ -740,15 +740,15 @@ define <4 x i32> @test_pinsrd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; ; SANDY-LABEL: test_pinsrd: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpinsrd $3, (%rsi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pinsrd: ; BTVER2: # BB#0: @@ -778,17 +778,17 @@ define <2 x i64> @test_pinsrq(<2 x i64> %a0, <2 x i64> %a1, i64 %a2, i64 *%a3) { ; ; SANDY-LABEL: test_pinsrq: ; SANDY: # BB#0: -; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pinsrq: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [2:2.00] -; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 # sched: [1:1.00] +; HASWELL-NEXT: vpinsrq $1, (%rsi), %xmm1, %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pinsrq: ; BTVER2: # BB#0: @@ -819,14 +819,14 @@ define <16 x i8> @test_pmaxsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pmaxsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaxsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaxsb: ; BTVER2: # BB#0: @@ -856,14 +856,14 @@ define <4 x i32> @test_pmaxsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pmaxsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaxsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaxsd: ; BTVER2: # BB#0: @@ -893,14 +893,14 @@ define <4 x i32> @test_pmaxud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pmaxud: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxud: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaxud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaxud: ; BTVER2: # BB#0: @@ -930,14 +930,14 @@ define <8 x i16> @test_pmaxuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pmaxuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaxuw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaxuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaxuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaxuw: ; BTVER2: # BB#0: @@ -967,14 +967,14 @@ define <16 x i8> @test_pminsb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pminsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminsb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpminsb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pminsb: ; BTVER2: # BB#0: @@ -1004,14 +1004,14 @@ define <4 x i32> @test_pminsd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pminsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminsd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpminsd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pminsd: ; BTVER2: # BB#0: @@ -1041,14 +1041,14 @@ define <4 x i32> @test_pminud(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_pminud: ; SANDY: # BB#0: ; SANDY-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminud: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminud %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpminud (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pminud: ; BTVER2: # BB#0: @@ -1078,14 +1078,14 @@ define <8 x i16> @test_pminuw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_pminuw: ; SANDY: # BB#0: ; SANDY-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pminuw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpminuw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpminuw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pminuw: ; BTVER2: # BB#0: @@ -1118,16 +1118,16 @@ define <8 x i16> @test_pmovsxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxbw %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpmovsxbw (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovsxbw: ; BTVER2: # BB#0: @@ -1162,16 +1162,16 @@ define <4 x i32> @test_pmovsxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxbd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxbd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpmovsxbd (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovsxbd: ; BTVER2: # BB#0: @@ -1206,16 +1206,16 @@ define <2 x i64> @test_pmovsxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-LABEL: test_pmovsxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxbq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxbq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpmovsxbq (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovsxbq: ; BTVER2: # BB#0: @@ -1250,16 +1250,16 @@ define <2 x i64> @test_pmovsxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-LABEL: test_pmovsxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxdq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpmovsxdq (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovsxdq: ; BTVER2: # BB#0: @@ -1294,16 +1294,16 @@ define <4 x i32> @test_pmovsxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-LABEL: test_pmovsxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxwd %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpmovsxwd (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovsxwd: ; BTVER2: # BB#0: @@ -1338,16 +1338,16 @@ define <2 x i64> @test_pmovsxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-LABEL: test_pmovsxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovsxwq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovsxwq %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [1:1.00] +; HASWELL-NEXT: vpmovsxwq (%rdi), %xmm1 # sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovsxwq: ; BTVER2: # BB#0: @@ -1382,16 +1382,16 @@ define <8 x i16> @test_pmovzxbw(<16 x i8> %a0, <8 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbw: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:0.50] ; SANDY-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxbw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [1:1.00] +; HASWELL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] ; HASWELL-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovzxbw: ; BTVER2: # BB#0: @@ -1426,16 +1426,16 @@ define <4 x i32> @test_pmovzxbd(<16 x i8> %a0, <4 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxbd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [1:1.00] +; HASWELL-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovzxbd: ; BTVER2: # BB#0: @@ -1470,16 +1470,16 @@ define <2 x i64> @test_pmovzxbq(<16 x i8> %a0, <2 x i8> *%a1) { ; SANDY-LABEL: test_pmovzxbq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxbq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [1:1.00] +; HASWELL-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovzxbq: ; BTVER2: # BB#0: @@ -1514,16 +1514,16 @@ define <2 x i64> @test_pmovzxdq(<4 x i32> %a0, <2 x i32> *%a1) { ; SANDY-LABEL: test_pmovzxdq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxdq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [1:1.00] +; HASWELL-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovzxdq: ; BTVER2: # BB#0: @@ -1558,16 +1558,16 @@ define <4 x i32> @test_pmovzxwd(<8 x i16> %a0, <4 x i16> *%a1) { ; SANDY-LABEL: test_pmovzxwd: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:0.50] ; SANDY-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxwd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [1:1.00] +; HASWELL-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] ; HASWELL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovzxwd: ; BTVER2: # BB#0: @@ -1602,16 +1602,16 @@ define <2 x i64> @test_pmovzxwq(<8 x i16> %a0, <2 x i16> *%a1) { ; SANDY-LABEL: test_pmovzxwq: ; SANDY: # BB#0: ; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:0.50] -; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [7:0.50] +; SANDY-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:0.50] ; SANDY-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmovzxwq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero sched: [1:1.00] -; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [1:1.00] +; HASWELL-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00] ; HASWELL-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmovzxwq: ; BTVER2: # BB#0: @@ -1642,15 +1642,15 @@ define <2 x i64> @test_pmuldq(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pmuldq: ; SANDY: # BB#0: -; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmuldq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmuldq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmuldq (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmuldq: ; BTVER2: # BB#0: @@ -1680,15 +1680,15 @@ define <4 x i32> @test_pmulld(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_pmulld: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulld: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulld %xmm1, %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vpmulld (%rdi), %xmm0, %xmm0 # sched: [10:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmulld: ; BTVER2: # BB#0: @@ -1724,23 +1724,23 @@ define i32 @test_ptest(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_ptest: ; SANDY: # BB#0: -; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; SANDY-NEXT: setb %al # sched: [1:1.00] -; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [8:1.00] -; SANDY-NEXT: setb %cl # sched: [1:1.00] +; SANDY-NEXT: vptest %xmm1, %xmm0 # sched: [1:0.33] +; SANDY-NEXT: setb %al # sched: [1:0.33] +; SANDY-NEXT: vptest (%rdi), %xmm0 # sched: [5:0.50] +; SANDY-NEXT: setb %cl # sched: [1:0.33] ; SANDY-NEXT: andb %al, %cl # sched: [1:0.33] ; SANDY-NEXT: movzbl %cl, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_ptest: ; HASWELL: # BB#0: ; HASWELL-NEXT: vptest %xmm1, %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: setb %al # sched: [1:1.00] +; HASWELL-NEXT: setb %al # sched: [1:0.50] ; HASWELL-NEXT: vptest (%rdi), %xmm0 # sched: [2:1.00] -; HASWELL-NEXT: setb %cl # sched: [1:1.00] +; HASWELL-NEXT: setb %cl # sched: [1:0.50] ; HASWELL-NEXT: andb %al, %cl # sched: [1:0.25] ; HASWELL-NEXT: movzbl %cl, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_ptest: ; BTVER2: # BB#0: @@ -1778,16 +1778,16 @@ define <2 x double> @test_roundpd(<2 x double> %a0, <2 x double> *%a1) { ; SANDY-LABEL: test_roundpd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundpd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [6:1.00] +; HASWELL-NEXT: vroundpd $7, %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: vroundpd $7, (%rdi), %xmm1 # sched: [10:2.00] ; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_roundpd: ; BTVER2: # BB#0: @@ -1822,16 +1822,16 @@ define <4 x float> @test_roundps(<4 x float> %a0, <4 x float> *%a1) { ; SANDY-LABEL: test_roundps: ; SANDY: # BB#0: ; SANDY-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [9:1.00] +; SANDY-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundps: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [5:2.00] -; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [6:1.00] +; HASWELL-NEXT: vroundps $7, %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: vroundps $7, (%rdi), %xmm1 # sched: [10:2.00] ; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_roundps: ; BTVER2: # BB#0: @@ -1867,16 +1867,16 @@ define <2 x double> @test_roundsd(<2 x double> %a0, <2 x double> %a1, <2 x doubl ; SANDY-LABEL: test_roundsd: ; SANDY: # BB#0: ; SANDY-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundsd: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [5:2.00] -; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; HASWELL-NEXT: vroundsd $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00] +; HASWELL-NEXT: vroundsd $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_roundsd: ; BTVER2: # BB#0: @@ -1912,16 +1912,16 @@ define <4 x float> @test_roundss(<4 x float> %a0, <4 x float> %a1, <4 x float> * ; SANDY-LABEL: test_roundss: ; SANDY: # BB#0: ; SANDY-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; SANDY-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [7:1.00] ; SANDY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_roundss: ; HASWELL: # BB#0: -; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [6:1.00] +; HASWELL-NEXT: vroundss $7, %xmm1, %xmm0, %xmm1 # sched: [6:2.00] +; HASWELL-NEXT: vroundss $7, (%rdi), %xmm0, %xmm0 # sched: [10:2.00] ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_roundss: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/sse42-schedule.ll b/test/CodeGen/X86/sse42-schedule.ll index c51c50a91bd..afc48bc57ee 100644 --- a/test/CodeGen/X86/sse42-schedule.ll +++ b/test/CodeGen/X86/sse42-schedule.ll @@ -26,16 +26,16 @@ define i32 @crc32_32_8(i32 %a0, i8 %a1, i8 *%a2) { ; SANDY-LABEL: crc32_32_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_32_8: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: crc32_32_8: ; BTVER2: # BB#0: @@ -68,16 +68,16 @@ define i32 @crc32_32_16(i32 %a0, i16 %a1, i16 *%a2) { ; SANDY-LABEL: crc32_32_16: ; SANDY: # BB#0: ; SANDY-NEXT: crc32w %si, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32w (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_32_16: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32w %si, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32w (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: crc32_32_16: ; BTVER2: # BB#0: @@ -112,14 +112,14 @@ define i32 @crc32_32_32(i32 %a0, i32 %a1, i32 *%a2) { ; SANDY-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; SANDY-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movl %edi, %eax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_32_32: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32l %esi, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32l (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movl %edi, %eax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: crc32_32_32: ; BTVER2: # BB#0: @@ -152,16 +152,16 @@ define i64 @crc32_64_8(i64 %a0, i8 %a1, i8 *%a2) nounwind { ; SANDY-LABEL: crc32_64_8: ; SANDY: # BB#0: ; SANDY-NEXT: crc32b %sil, %edi # sched: [3:1.00] -; SANDY-NEXT: crc32b (%rdx), %edi # sched: [8:1.00] +; SANDY-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_64_8: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32b %sil, %edi # sched: [3:1.00] ; HASWELL-NEXT: crc32b (%rdx), %edi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: crc32_64_8: ; BTVER2: # BB#0: @@ -196,14 +196,14 @@ define i64 @crc32_64_64(i64 %a0, i64 %a1, i64 *%a2) { ; SANDY-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; SANDY-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; SANDY-NEXT: movq %rdi, %rax # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: crc32_64_64: ; HASWELL: # BB#0: ; HASWELL-NEXT: crc32q %rsi, %rdi # sched: [3:1.00] ; HASWELL-NEXT: crc32q (%rdx), %rdi # sched: [7:1.00] ; HASWELL-NEXT: movq %rdi, %rax # sched: [1:0.25] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: crc32_64_64: ; BTVER2: # BB#0: @@ -256,20 +256,20 @@ define i32 @test_pcmpestri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [4:2.33] ; SANDY-NEXT: # kill: %ECX %ECX %RCX ; SANDY-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpestri: ; HASWELL: # BB#0: ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [18:4.00] +; HASWELL-NEXT: vpcmpestri $7, %xmm1, %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: movl %ecx, %esi # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [18:4.00] +; HASWELL-NEXT: vpcmpestri $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: # kill: %ECX %ECX %RCX ; HASWELL-NEXT: leal (%rcx,%rsi), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpestri: ; BTVER2: # BB#0: @@ -320,17 +320,17 @@ define <16 x i8> @test_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-NEXT: movl $7, %eax # sched: [1:0.33] ; SANDY-NEXT: movl $7, %edx # sched: [1:0.33] ; SANDY-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [11:2.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpestrm: ; HASWELL: # BB#0: ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [19:4.00] +; HASWELL-NEXT: vpcmpestrm $7, %xmm1, %xmm0 # sched: [10:4.00] ; HASWELL-NEXT: movl $7, %eax # sched: [1:0.25] ; HASWELL-NEXT: movl $7, %edx # sched: [1:0.25] -; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [19:4.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpcmpestrm $7, (%rdi), %xmm0 # sched: [10:3.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpestrm: ; BTVER2: # BB#0: @@ -369,12 +369,12 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pcmpistri: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [11:3.00] +; SANDY-NEXT: vpcmpistri $7, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: movl %ecx, %eax # sched: [1:0.33] -; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [17:3.00] +; SANDY-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [3:1.00] ; SANDY-NEXT: # kill: %ECX %ECX %RCX ; SANDY-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpistri: ; HASWELL: # BB#0: @@ -383,7 +383,7 @@ define i32 @test_pcmpistri(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; HASWELL-NEXT: vpcmpistri $7, (%rdi), %xmm0 # sched: [11:3.00] ; HASWELL-NEXT: # kill: %ECX %ECX %RCX ; HASWELL-NEXT: leal (%rcx,%rax), %eax # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpistri: ; BTVER2: # BB#0: @@ -416,15 +416,15 @@ define <16 x i8> @test_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pcmpistrm: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [17:3.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:1.00] +; SANDY-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpistrm: ; HASWELL: # BB#0: -; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [11:3.00] -; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [11:3.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpcmpistrm $7, %xmm1, %xmm0 # sched: [10:3.00] +; HASWELL-NEXT: vpcmpistrm $7, (%rdi), %xmm0 # sched: [10:3.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpistrm: ; BTVER2: # BB#0: @@ -453,15 +453,15 @@ define <2 x i64> @test_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> *%a2) { ; ; SANDY-LABEL: test_pcmpgtq: ; SANDY: # BB#0: -; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [11:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pcmpgtq: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; HASWELL-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pcmpgtq: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/ssse3-schedule.ll b/test/CodeGen/X86/ssse3-schedule.ll index 353784e6dc4..8b7a0c0ec02 100644 --- a/test/CodeGen/X86/ssse3-schedule.ll +++ b/test/CodeGen/X86/ssse3-schedule.ll @@ -35,16 +35,16 @@ define <16 x i8> @test_pabsb(<16 x i8> %a0, <16 x i8> *%a1) { ; SANDY-LABEL: test_pabsb: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pabsb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsb %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [1:0.50] +; HASWELL-NEXT: vpabsb (%rdi), %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pabsb: ; BTVER2: # BB#0: @@ -86,16 +86,16 @@ define <4 x i32> @test_pabsd(<4 x i32> %a0, <4 x i32> *%a1) { ; SANDY-LABEL: test_pabsd: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [7:0.50] +; SANDY-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] ; SANDY-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pabsd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsd %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [1:0.50] +; HASWELL-NEXT: vpabsd (%rdi), %xmm1 # sched: [5:0.50] ; HASWELL-NEXT: vpor %xmm1, %xmm0, %xmm0 # sched: [1:0.33] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pabsd: ; BTVER2: # BB#0: @@ -136,12 +136,12 @@ define <8 x i16> @test_pabsw(<8 x i16> %a0, <8 x i16> *%a1) { ; SANDY-LABEL: test_pabsw: ; SANDY: # BB#0: ; SANDY-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pabsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpabsw %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pabsw: ; BTVER2: # BB#0: @@ -182,14 +182,14 @@ define <8 x i16> @test_palignr(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_palignr: ; SANDY: # BB#0: ; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:0.50] -; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_palignr: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5] sched: [1:1.00] -; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpalignr {{.*#+}} xmm0 = mem[14,15],xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_palignr: ; BTVER2: # BB#0: @@ -223,15 +223,15 @@ define <4 x i32> @test_phaddd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_phaddd: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phaddd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphaddd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vphaddd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phaddd: ; BTVER2: # BB#0: @@ -274,15 +274,15 @@ define <8 x i16> @test_phaddsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phaddsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phaddsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vphaddsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phaddsw: ; BTVER2: # BB#0: @@ -317,15 +317,15 @@ define <8 x i16> @test_phaddw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phaddw: ; SANDY: # BB#0: -; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phaddw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphaddw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vphaddw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phaddw: ; BTVER2: # BB#0: @@ -360,15 +360,15 @@ define <4 x i32> @test_phsubd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; ; SANDY-LABEL: test_phsubd: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phsubd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphsubd %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vphsubd (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phsubd: ; BTVER2: # BB#0: @@ -411,15 +411,15 @@ define <8 x i16> @test_phsubsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phsubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phsubsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vphsubsw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phsubsw: ; BTVER2: # BB#0: @@ -454,15 +454,15 @@ define <8 x i16> @test_phsubw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_phsubw: ; SANDY: # BB#0: -; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:1.50] -; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [9:1.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] +; SANDY-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_phsubw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vphsubw %xmm1, %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [3:2.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vphsubw (%rdi), %xmm0, %xmm0 # sched: [6:2.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_phsubw: ; BTVER2: # BB#0: @@ -497,15 +497,15 @@ define <8 x i16> @test_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; ; SANDY-LABEL: test_pmaddubsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] ; SANDY-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmaddubsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpmaddubsw (%rdi), %xmm0, %xmm0 # sched: [9:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmaddubsw: ; BTVER2: # BB#0: @@ -538,13 +538,13 @@ define <8 x i16> @test_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; ; SANDY-LABEL: test_pmulhrsw: ; SANDY: # BB#0: -; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [3:1.00] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pmulhrsw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 # sched: [5:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pmulhrsw: ; BTVER2: # BB#0: @@ -579,14 +579,14 @@ define <16 x i8> @test_pshufb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_pshufb: ; SANDY: # BB#0: ; SANDY-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_pshufb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [1:1.00] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpshufb (%rdi), %xmm0, %xmm0 # sched: [5:1.00] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_pshufb: ; BTVER2: # BB#0: @@ -630,14 +630,14 @@ define <16 x i8> @test_psignb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> *%a2) { ; SANDY-LABEL: test_psignb: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psignb: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsignb %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsignb (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psignb: ; BTVER2: # BB#0: @@ -681,14 +681,14 @@ define <4 x i32> @test_psignd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> *%a2) { ; SANDY-LABEL: test_psignd: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psignd: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsignd %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsignd (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psignd: ; BTVER2: # BB#0: @@ -732,14 +732,14 @@ define <8 x i16> @test_psignw(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> *%a2) { ; SANDY-LABEL: test_psignw: ; SANDY: # BB#0: ; SANDY-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [7:0.50] -; SANDY-NEXT: retq # sched: [1:1.00] +; SANDY-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; SANDY-NEXT: retq # sched: [5:1.00] ; ; HASWELL-LABEL: test_psignw: ; HASWELL: # BB#0: ; HASWELL-NEXT: vpsignw %xmm1, %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [1:0.50] -; HASWELL-NEXT: retq # sched: [2:1.00] +; HASWELL-NEXT: vpsignw (%rdi), %xmm0, %xmm0 # sched: [5:0.50] +; HASWELL-NEXT: retq # sched: [1:1.00] ; ; BTVER2-LABEL: test_psignw: ; BTVER2: # BB#0: diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll index a07d2dd90be..4d4b7f4e822 100644 --- a/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -201,14 +201,14 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9 -; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9 ; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 +; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 @@ -328,14 +328,14 @@ define <64 x i8> @constant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512DQ-NEXT: vpsraw $2, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm2, %ymm2, %ymm8 ; AVX512DQ-NEXT: vpblendvb %ymm8, %ymm5, %ymm0, %ymm0 -; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] -; AVX512DQ-NEXT: vpsraw $4, %ymm5, %ymm9 -; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm9, %ymm5, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm0, %ymm5 ; AVX512DQ-NEXT: vpaddw %ymm8, %ymm8, %ymm9 ; AVX512DQ-NEXT: vpblendvb %ymm9, %ymm5, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpsrlw $8, %ymm0, %ymm0 ; AVX512DQ-NEXT: vpackuswb %ymm4, %ymm0, %ymm0 +; AVX512DQ-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31] +; AVX512DQ-NEXT: vpsraw $4, %ymm4, %ymm5 +; AVX512DQ-NEXT: vpblendvb %ymm3, %ymm5, %ymm4, %ymm3 ; AVX512DQ-NEXT: vpsraw $2, %ymm3, %ymm4 ; AVX512DQ-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3 ; AVX512DQ-NEXT: vpsraw $1, %ymm3, %ymm4 diff --git a/test/CodeGen/X86/vector-shuffle-512-v32.ll b/test/CodeGen/X86/vector-shuffle-512-v32.ll index d6c4ff1255e..7a5c992bb82 100644 --- a/test/CodeGen/X86/vector-shuffle-512-v32.ll +++ b/test/CodeGen/X86/vector-shuffle-512-v32.ll @@ -68,13 +68,13 @@ define <32 x i16> @shuffle_v32i16_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_1 ; KNL-NEXT: vpshufb {{.*#+}} xmm4 = xmm1[8,9,12,13,12,13,10,11,0,1,4,5,4,5,0,1] ; KNL-NEXT: vpshufd {{.*#+}} xmm1 = xmm2[0,1,0,3] ; KNL-NEXT: vpshuflw {{.*#+}} xmm2 = xmm1[0,3,2,2,4,5,6,7] -; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 -; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm1 +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm5 +; KNL-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] ; KNL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[12,13,10,11,8,9,14,15,4,5,2,3,2,3,6,7] -; KNL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15] -; KNL-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm5 -; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm5[1],ymm0[2],ymm5[3],ymm0[4],ymm5[5],ymm0[6],ymm5[7],ymm0[8],ymm5[9],ymm0[10],ymm5[11],ymm0[12],ymm5[13],ymm0[14],ymm5[15] +; KNL-NEXT: vpshufb {{.*#+}} xmm5 = xmm5[6,7,2,3,4,5,6,7,2,3,2,3,0,1,14,15] +; KNL-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm0 +; KNL-NEXT: vpblendw {{.*#+}} ymm1 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] ; KNL-NEXT: vextracti128 $1, %ymm3, %xmm3 ; KNL-NEXT: vpbroadcastw %xmm3, %ymm3 ; KNL-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0] -- 2.50.0