From: Simon Pilgrim Date: Tue, 5 Dec 2017 19:21:28 +0000 (+0000) Subject: [X86][AVX512] Tag VPTRUNC/VPMOVSX/VPMOVZX instruction scheduler classes X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=952471eaa55401f25188e0c158fb59bbdd1586ce;p=llvm [X86][AVX512] Tag VPTRUNC/VPMOVSX/VPMOVZX instruction scheduler classes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319815 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index b074b0d710e..9a7da097647 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -8010,26 +8010,36 @@ defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", SSE_ALU_F64S, // Integer truncate and extend operations //------------------------------------------------- +let Sched = WriteShuffle256 in +def AVX512_EXTEND : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + +let Sched = WriteShuffle256 in +def AVX512_TRUNCATE : OpndItins< + IIC_SSE_PSHUF_RI, IIC_SSE_PSHUF_MI +>; + multiclass avx512_trunc_common opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo, - X86MemOperand x86memop> { + OpndItins itins, X86VectorVTInfo SrcInfo, + X86VectorVTInfo DestInfo, X86MemOperand x86memop> { let ExeDomain = DestInfo.ExeDomain in defm rr : AVX512_maskable, - EVEX, T8XS; + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1))), + itins.rr>, EVEX, T8XS, Sched<[itins.Sched]>; let mayStore = 1, mayLoad = 1, hasSideEffects = 0, ExeDomain = DestInfo.ExeDomain in { def mr : AVX512XS8I, EVEX; + [], itins.rm>, EVEX, Sched<[itins.Sched.Folded]>; def mrk : AVX512XS8I, EVEX, EVEX_K; + [], itins.rm>, EVEX, EVEX_K, Sched<[itins.Sched.Folded]>; }//mayStore = 1, mayLoad = 1, hasSideEffects = 0 } @@ -8048,112 +8058,118 @@ multiclass avx512_trunc_mr_lowering opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, + OpndItins itins, AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128, X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ, X86MemOperand x86memopZ128, X86MemOperand x86memopZ256, X86MemOperand x86memopZ, PatFrag truncFrag, PatFrag mtruncFrag, Predicate prd = HasAVX512>{ let Predicates = [HasVLX, prd] in { - defm Z128: avx512_trunc_common, + defm Z128: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V128; - defm Z256: avx512_trunc_common, + defm Z256: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V256; } let Predicates = [prd] in - defm Z: avx512_trunc_common, + defm Z: avx512_trunc_common, avx512_trunc_mr_lowering, EVEX_V512; } multiclass avx512_trunc_qb opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VO>; } multiclass avx512_trunc_qw opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VQ>; } multiclass avx512_trunc_qd opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<32, CD8VH>; } multiclass avx512_trunc_db opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<8, CD8VQ>; } multiclass avx512_trunc_dw opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } multiclass avx512_trunc_wb opc, string OpcodeStr, SDNode OpNode, - PatFrag StoreNode, PatFrag MaskedStoreNode> { - defm NAME: avx512_trunc { + defm NAME: avx512_trunc, EVEX_CD8<16, CD8VH>; } -defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, +defm VPMOVQB : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc, AVX512_TRUNCATE, truncstorevi8, masked_truncstorevi8>; -defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, +defm VPMOVSQB : avx512_trunc_qb<0x22, "vpmovsqb", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, +defm VPMOVUSQB : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi8, masked_truncstore_us_vi8>; -defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, +defm VPMOVQW : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc, AVX512_TRUNCATE, truncstorevi16, masked_truncstorevi16>; -defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, +defm VPMOVSQW : avx512_trunc_qw<0x24, "vpmovsqw", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, +defm VPMOVUSQW : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi16, masked_truncstore_us_vi16>; -defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, +defm VPMOVQD : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc, AVX512_TRUNCATE, truncstorevi32, masked_truncstorevi32>; -defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, +defm VPMOVSQD : avx512_trunc_qd<0x25, "vpmovsqd", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi32, masked_truncstore_s_vi32>; -defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, +defm VPMOVUSQD : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi32, masked_truncstore_us_vi32>; -defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, +defm VPMOVDB : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc, AVX512_TRUNCATE, truncstorevi8, masked_truncstorevi8>; -defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, +defm VPMOVSDB : avx512_trunc_db<0x21, "vpmovsdb", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, +defm VPMOVUSDB : avx512_trunc_db<0x11, "vpmovusdb", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi8, masked_truncstore_us_vi8>; -defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, +defm VPMOVDW : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc, AVX512_TRUNCATE, truncstorevi16, masked_truncstorevi16>; -defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, +defm VPMOVSDW : avx512_trunc_dw<0x23, "vpmovsdw", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi16, masked_truncstore_s_vi16>; -defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, +defm VPMOVUSDW : avx512_trunc_dw<0x13, "vpmovusdw", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi16, masked_truncstore_us_vi16>; -defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, +defm VPMOVWB : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc, AVX512_TRUNCATE, truncstorevi8, masked_truncstorevi8>; -defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, +defm VPMOVSWB : avx512_trunc_wb<0x20, "vpmovswb", X86vtruncs, AVX512_TRUNCATE, truncstore_s_vi8, masked_truncstore_s_vi8>; -defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, +defm VPMOVUSWB : avx512_trunc_wb<0x10, "vpmovuswb", X86vtruncus, AVX512_TRUNCATE, truncstore_us_vi8, masked_truncstore_us_vi8>; let Predicates = [HasAVX512, NoVLX] in { @@ -8173,150 +8189,150 @@ def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))), VR256X:$src, sub_ymm))), sub_xmm))>; } -multiclass avx512_extend_common opc, string OpcodeStr, +multiclass avx512_extend_common opc, string OpcodeStr, OpndItins itins, X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo, X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{ let ExeDomain = DestInfo.ExeDomain in { defm rr : AVX512_maskable, - EVEX; + (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src))), itins.rr>, + EVEX, Sched<[itins.Sched]>; defm rm : AVX512_maskable, - EVEX; + (DestInfo.VT (LdFrag addr:$src)), itins.rm>, + EVEX, Sched<[itins.Sched.Folded]>; } } multiclass avx512_extend_BW opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasBWI] in { - defm Z128: avx512_extend_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasBWI] in { - defm Z : avx512_extend_common, EVEX_CD8<8, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_BD opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_BQ opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi8")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<8, CD8VO>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_WD opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<16, CD8VH>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_WQ opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi16")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V128, VEX_WIG; - defm Z256: avx512_extend_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256, VEX_WIG; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V512, VEX_WIG; } } multiclass avx512_extend_DQ opc, string OpcodeStr, - SDPatternOperator OpNode, SDPatternOperator InVecNode, - string ExtTy,PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { + SDPatternOperator OpNode, SDPatternOperator InVecNode, string ExtTy, + OpndItins itins, PatFrag LdFrag = !cast(ExtTy#"extloadvi32")> { let Predicates = [HasVLX, HasAVX512] in { - defm Z128: avx512_extend_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V128; - defm Z256: avx512_extend_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256; } let Predicates = [HasAVX512] in { - defm Z : avx512_extend_common, EVEX_CD8<32, CD8VH>, T8PD, EVEX_V512; } } -defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z">; -defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z">; -defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z">; -defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z">; -defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z">; -defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z">; +defm VPMOVZXBW : avx512_extend_BW<0x30, "vpmovzxbw", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXBD : avx512_extend_BD<0x31, "vpmovzxbd", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXBQ : avx512_extend_BQ<0x32, "vpmovzxbq", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXWD : avx512_extend_WD<0x33, "vpmovzxwd", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXWQ : avx512_extend_WQ<0x34, "vpmovzxwq", X86vzext, zext_invec, "z", AVX512_EXTEND>; +defm VPMOVZXDQ : avx512_extend_DQ<0x35, "vpmovzxdq", X86vzext, zext_invec, "z", AVX512_EXTEND>; -defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s">; -defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s">; -defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s">; -defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s">; -defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s">; -defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s">; +defm VPMOVSXBW: avx512_extend_BW<0x20, "vpmovsxbw", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXBD: avx512_extend_BD<0x21, "vpmovsxbd", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXBQ: avx512_extend_BQ<0x22, "vpmovsxbq", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXWD: avx512_extend_WD<0x23, "vpmovsxwd", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXWQ: avx512_extend_WQ<0x24, "vpmovsxwq", X86vsext, sext_invec, "s", AVX512_EXTEND>; +defm VPMOVSXDQ: avx512_extend_DQ<0x25, "vpmovsxdq", X86vsext, sext_invec, "s", AVX512_EXTEND>; multiclass AVX512_pmovx_patterns @f32to16uc(<16 x float> %f) { ; GENERIC-LABEL: f32to16uc: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 +; GENERIC-NEXT: vpmovdb %zmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1597,7 +1597,7 @@ define <16 x i16> @f32to16us(<16 x float> %f) { ; GENERIC-LABEL: f32to16us: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttps2dq %zmm0, %zmm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 +; GENERIC-NEXT: vpmovdw %zmm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: f32to16us: @@ -1655,7 +1655,7 @@ define <8 x i16> @f64to8us(<8 x double> %f) { ; GENERIC-LABEL: f64to8us: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 +; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -1673,7 +1673,7 @@ define <8 x i8> @f64to8uc(<8 x double> %f) { ; GENERIC-LABEL: f64to8uc: ; GENERIC: # %bb.0: ; GENERIC-NEXT: vcvttpd2dq %zmm0, %ymm0 # sched: [3:1.00] -; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 +; GENERIC-NEXT: vpmovdw %ymm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2380,7 +2380,7 @@ define <16 x float> @sbto16f32(<16 x i32> %a) { define <16 x float> @scto16f32(<16 x i8> %a) { ; GENERIC-LABEL: scto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2396,7 +2396,7 @@ define <16 x float> @scto16f32(<16 x i8> %a) { define <16 x float> @ssto16f32(<16 x i16> %a) { ; GENERIC-LABEL: ssto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 +; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2448,7 +2448,7 @@ define <8 x double> @scto8f64(<8 x i8> %a) { define <16 x double> @scto16f64(<16 x i8> %a) { ; GENERIC-LABEL: scto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] @@ -2621,7 +2621,7 @@ define <2 x double> @sbto2f64(<2 x double> %a) { define <16 x float> @ucto16f32(<16 x i8> %a) { ; GENERIC-LABEL: ucto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2655,7 +2655,7 @@ define <8 x double> @ucto8f64(<8 x i8> %a) { define <16 x float> @swto16f32(<16 x i16> %a) { ; GENERIC-LABEL: swto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 +; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2687,7 +2687,7 @@ define <8 x double> @swto8f64(<8 x i16> %a) { define <16 x double> @swto16f64(<16 x i16> %a) { ; GENERIC-LABEL: swto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 +; GENERIC-NEXT: vpmovsxwd %ymm0, %zmm1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] @@ -2707,7 +2707,7 @@ define <16 x double> @swto16f64(<16 x i16> %a) { define <16 x double> @ucto16f64(<16 x i8> %a) { ; GENERIC-LABEL: ucto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] @@ -2727,7 +2727,7 @@ define <16 x double> @ucto16f64(<16 x i8> %a) { define <16 x float> @uwto16f32(<16 x i16> %a) { ; GENERIC-LABEL: uwto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -2759,7 +2759,7 @@ define <8 x double> @uwto8f64(<8 x i16> %a) { define <16 x double> @uwto16f64(<16 x i16> %a) { ; GENERIC-LABEL: uwto16f64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: vextracti64x4 $1, %zmm1, %ymm1 # sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2pd %ymm1, %zmm1 # sched: [4:1.00] @@ -2813,7 +2813,7 @@ define <16 x double> @sito16f64(<16 x i32> %a) { define <16 x float> @usto16f32(<16 x i16> %a) { ; GENERIC-LABEL: usto16f32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: vcvtdq2ps %zmm0, %zmm0 # sched: [4:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -3011,7 +3011,7 @@ define <8 x i16> @zext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x16: @@ -3031,7 +3031,7 @@ define <8 x i16> @sext_8x8mem_to_8x16(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbw (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x16: @@ -3052,7 +3052,7 @@ define <16 x i16> @zext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8mem_to_16x16: @@ -3072,7 +3072,7 @@ define <16 x i16> @sext_16x8mem_to_16x16(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbw (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8mem_to_16x16: @@ -3106,7 +3106,7 @@ define <16 x i16> @zext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8_to_16x16_mask: @@ -3139,7 +3139,7 @@ define <16 x i16> @sext_16x8_to_16x16_mask(<16 x i8> %a ,<16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 -; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbw %xmm0, %ymm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8_to_16x16_mask: @@ -3158,7 +3158,7 @@ define <32 x i16> @zext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero,mem[16],zero,mem[17],zero,mem[18],zero,mem[19],zero,mem[20],zero,mem[21],zero,mem[22],zero,mem[23],zero,mem[24],zero,mem[25],zero,mem[26],zero,mem[27],zero,mem[28],zero,mem[29],zero,mem[30],zero,mem[31],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_32x8mem_to_32x16: @@ -3178,7 +3178,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm0, %ymm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm0, %k1 -; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbw (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_32x8mem_to_32x16: @@ -3196,7 +3196,7 @@ define <32 x i16> @sext_32x8mem_to_32x16(<32 x i8> *%i , <32 x i1> %mask) nounwi define <32 x i16> @zext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; GENERIC-LABEL: zext_32x8_to_32x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_32x8_to_32x16: @@ -3212,7 +3212,7 @@ define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 -; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_32x8_to_32x16_mask: @@ -3229,7 +3229,7 @@ define <32 x i16> @zext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwi define <32 x i16> @sext_32x8_to_32x16(<32 x i8> %a ) nounwind readnone { ; GENERIC-LABEL: sext_32x8_to_32x16: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 +; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_32x8_to_32x16: @@ -3245,7 +3245,7 @@ define <32 x i16> @sext_32x8_to_32x16_mask(<32 x i8> %a ,<32 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %ymm1, %ymm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %ymm1, %k1 -; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbw %ymm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_32x8_to_32x16_mask: @@ -3264,7 +3264,7 @@ define <4 x i32> @zext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x32: @@ -3284,7 +3284,7 @@ define <4 x i32> @sext_4x8mem_to_4x32(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x32: @@ -3304,7 +3304,7 @@ define <8 x i32> @zext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x32: @@ -3324,7 +3324,7 @@ define <8 x i32> @sext_8x8mem_to_8x32(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x32: @@ -3344,7 +3344,7 @@ define <16 x i32> @zext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero,mem[8],zero,zero,zero,mem[9],zero,zero,zero,mem[10],zero,zero,zero,mem[11],zero,zero,zero,mem[12],zero,zero,zero,mem[13],zero,zero,zero,mem[14],zero,zero,zero,mem[15],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8mem_to_16x32: @@ -3364,7 +3364,7 @@ define <16 x i32> @sext_16x8mem_to_16x32(<16 x i8> *%i , <16 x i1> %mask) nounwi ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8mem_to_16x32: @@ -3384,7 +3384,7 @@ define <16 x i32> @zext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8_to_16x32_mask: @@ -3403,7 +3403,7 @@ define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 {%k1} {z} # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8_to_16x32_mask: @@ -3420,7 +3420,7 @@ define <16 x i32> @sext_16x8_to_16x32_mask(<16 x i8> %a , <16 x i1> %mask) nounw define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { ; GENERIC-LABEL: zext_16x8_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero +; GENERIC-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x8_to_16x32: @@ -3434,7 +3434,7 @@ define <16 x i32> @zext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { define <16 x i32> @sext_16x8_to_16x32(<16 x i8> %i) nounwind readnone { ; GENERIC-LABEL: sext_16x8_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 +; GENERIC-NEXT: vpmovsxbd %xmm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x8_to_16x32: @@ -3450,7 +3450,7 @@ define <2 x i64> @zext_2x8mem_to_2x64(<2 x i8> *%i , <2 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: vpmovzxbq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x8mem_to_2x64: @@ -3469,7 +3469,7 @@ define <2 x i64> @sext_2x8mem_to_2x64mask(<2 x i8> *%i , <2 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x8mem_to_2x64mask: @@ -3503,7 +3503,7 @@ define <4 x i64> @zext_4x8mem_to_4x64(<4 x i8> *%i , <4 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: vpmovzxbq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x8mem_to_4x64: @@ -3523,7 +3523,7 @@ define <4 x i64> @sext_4x8mem_to_4x64mask(<4 x i8> *%i , <4 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x8mem_to_4x64mask: @@ -3558,7 +3558,7 @@ define <8 x i64> @zext_8x8mem_to_8x64(<8 x i8> *%i , <8 x i1> %mask) nounwind re ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero +; GENERIC-NEXT: vpmovzxbq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x8mem_to_8x64: @@ -3578,7 +3578,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x64mask: @@ -3596,7 +3596,7 @@ define <8 x i64> @sext_8x8mem_to_8x64mask(<8 x i8> *%i , <8 x i1> %mask) nounwin define <8 x i64> @sext_8x8mem_to_8x64(<8 x i8> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x8mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x8mem_to_8x64: @@ -3613,7 +3613,7 @@ define <4 x i32> @zext_4x16mem_to_4x32(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x32: @@ -3633,7 +3633,7 @@ define <4 x i32> @sext_4x16mem_to_4x32mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxwd (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x32mask: @@ -3669,7 +3669,7 @@ define <8 x i32> @zext_8x16mem_to_8x32(<8 x i16> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16mem_to_8x32: @@ -3689,7 +3689,7 @@ define <8 x i32> @sext_8x16mem_to_8x32mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxwd (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x32mask: @@ -3724,7 +3724,7 @@ define <8 x i32> @zext_8x16_to_8x32mask(<8 x i16> %a , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16_to_8x32mask: @@ -3757,7 +3757,7 @@ define <16 x i32> @zext_16x16mem_to_16x32(<16 x i16> *%i , <16 x i1> %mask) noun ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x16mem_to_16x32: @@ -3777,7 +3777,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x16mem_to_16x32mask: @@ -3795,7 +3795,7 @@ define <16 x i32> @sext_16x16mem_to_16x32mask(<16 x i16> *%i , <16 x i1> %mask) define <16 x i32> @sext_16x16mem_to_16x32(<16 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_16x16mem_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 +; GENERIC-NEXT: vpmovsxwd (%rdi), %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_16x16mem_to_16x32: @@ -3811,7 +3811,7 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) noun ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x16_to_16x32mask: @@ -3828,7 +3828,7 @@ define <16 x i32> @zext_16x16_to_16x32mask(<16 x i16> %a , <16 x i1> %mask) noun define <16 x i32> @zext_16x16_to_16x32(<16 x i16> %a ) nounwind readnone { ; GENERIC-LABEL: zext_16x16_to_16x32: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; GENERIC-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_16x16_to_16x32: @@ -3844,7 +3844,7 @@ define <2 x i64> @zext_2x16mem_to_2x64(<2 x i16> *%i , <2 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero +; GENERIC-NEXT: vpmovzxwq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x16mem_to_2x64: @@ -3864,7 +3864,7 @@ define <2 x i64> @sext_2x16mem_to_2x64mask(<2 x i16> *%i , <2 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxwq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x16mem_to_2x64mask: @@ -3899,7 +3899,7 @@ define <4 x i64> @zext_4x16mem_to_4x64(<4 x i16> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero +; GENERIC-NEXT: vpmovzxwq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x16mem_to_4x64: @@ -3919,7 +3919,7 @@ define <4 x i64> @sext_4x16mem_to_4x64mask(<4 x i16> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxwq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x16mem_to_4x64mask: @@ -3954,7 +3954,7 @@ define <8 x i64> @zext_8x16mem_to_8x64(<8 x i16> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16mem_to_8x64: @@ -3974,7 +3974,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x64mask: @@ -3992,7 +3992,7 @@ define <8 x i64> @sext_8x16mem_to_8x64mask(<8 x i16> *%i , <8 x i1> %mask) nounw define <8 x i64> @sext_8x16mem_to_8x64(<8 x i16> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x16mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 +; GENERIC-NEXT: vpmovsxwq (%rdi), %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x16mem_to_8x64: @@ -4009,7 +4009,7 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16_to_8x64mask: @@ -4026,7 +4026,7 @@ define <8 x i64> @zext_8x16_to_8x64mask(<8 x i16> %a , <8 x i1> %mask) nounwind define <8 x i64> @zext_8x16_to_8x64(<8 x i16> %a) nounwind readnone { ; GENERIC-LABEL: zext_8x16_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero +; GENERIC-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x16_to_8x64: @@ -4042,7 +4042,7 @@ define <2 x i64> @zext_2x32mem_to_2x64(<2 x i32> *%i , <2 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero +; GENERIC-NEXT: vpmovzxdq {{.*#+}} xmm0 {%k1} {z} = mem[0],zero,mem[1],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_2x32mem_to_2x64: @@ -4062,7 +4062,7 @@ define <2 x i64> @sext_2x32mem_to_2x64mask(<2 x i32> *%i , <2 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllq $63, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmq %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxdq (%rdi), %xmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_2x32mem_to_2x64mask: @@ -4097,7 +4097,7 @@ define <4 x i64> @zext_4x32mem_to_4x64(<4 x i32> *%i , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x32mem_to_4x64: @@ -4117,7 +4117,7 @@ define <4 x i64> @sext_4x32mem_to_4x64mask(<4 x i32> *%i , <4 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm0, %xmm0, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxdq (%rdi), %ymm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_4x32mem_to_4x64mask: @@ -4166,7 +4166,7 @@ define <4 x i64> @zext_4x32_to_4x64mask(<4 x i32> %a , <4 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpslld $31, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vptestmd %xmm1, %xmm1, %k1 # sched: [1:1.00] -; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; GENERIC-NEXT: vpmovzxdq {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_4x32_to_4x64mask: @@ -4185,7 +4185,7 @@ define <8 x i64> @zext_8x32mem_to_8x64(<8 x i32> *%i , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero +; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x32mem_to_8x64: @@ -4205,7 +4205,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm0, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm0, %k1 -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 {%k1} {z} # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32mem_to_8x64mask: @@ -4223,7 +4223,7 @@ define <8 x i64> @sext_8x32mem_to_8x64mask(<8 x i32> *%i , <8 x i1> %mask) nounw define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { ; GENERIC-LABEL: sext_8x32mem_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 +; GENERIC-NEXT: vpmovsxdq (%rdi), %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32mem_to_8x64: @@ -4238,7 +4238,7 @@ define <8 x i64> @sext_8x32mem_to_8x64(<8 x i32> *%i) nounwind readnone { define <8 x i64> @sext_8x32_to_8x64(<8 x i32> %a) nounwind readnone { ; GENERIC-LABEL: sext_8x32_to_8x64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 +; GENERIC-NEXT: vpmovsxdq %ymm0, %zmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: sext_8x32_to_8x64: @@ -4254,7 +4254,7 @@ define <8 x i64> @zext_8x32_to_8x64mask(<8 x i32> %a , <8 x i1> %mask) nounwind ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $15, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovw2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero +; GENERIC-NEXT: vpmovzxdq {{.*#+}} zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: zext_8x32_to_8x64mask: @@ -4527,7 +4527,7 @@ define <8 x i64> @sext_8i1_8i64(<8 x i32> %a1, <8 x i32> %a2) nounwind { define void @extload_v8i64(<8 x i8>* %a, <8 x i64>* %res) { ; GENERIC-LABEL: extload_v8i64: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 +; GENERIC-NEXT: vpmovsxbq (%rdi), %zmm0 # sched: [5:1.00] ; GENERIC-NEXT: vmovdqa64 %zmm0, (%rsi) ; GENERIC-NEXT: vzeroupper ; GENERIC-NEXT: retq # sched: [1:1.00] @@ -4586,7 +4586,7 @@ define <16 x i16> @shuffle_zext_16x8_to_16x16_mask(<16 x i8> %a, <16 x i1> %mask ; GENERIC: # %bb.0: ; GENERIC-NEXT: vpsllw $7, %xmm1, %xmm1 # sched: [1:1.00] ; GENERIC-NEXT: vpmovb2m %xmm1, %k1 -; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; GENERIC-NEXT: vpmovzxbw {{.*#+}} ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: shuffle_zext_16x8_to_16x16_mask: