From d854787edd6c4fa4307fc0a469c4e738e2d98660 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Sep 2016 02:53:43 +0000 Subject: [PATCH] [X86,AVX-512] Use INSERT_SUBREG instead of SUBREG_TO_REG when the input is not the output of an instruction. SUBREG_TO_REG is supposed to indicate that the super register has been zeroed, but we can't prove that if we don't know where it came from. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@281885 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrAVX512.td | 64 +++++---- lib/Target/X86/X86InstrSSE.td | 12 +- test/CodeGen/X86/avx512-fsel.ll | 2 + test/CodeGen/X86/avx512-intrinsics.ll | 16 +-- test/CodeGen/X86/avx512bwvl-intrinsics.ll | 16 +-- test/CodeGen/X86/avx512dq-intrinsics.ll | 4 +- test/CodeGen/X86/avx512vl-intrinsics.ll | 160 +++++++++++----------- test/CodeGen/X86/pr27591.ll | 2 + 8 files changed, 146 insertions(+), 130 deletions(-) diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 11b52464e20..99fe48a3227 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -1421,15 +1421,15 @@ def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1), (v8f32 VR256X:$src2))), (EXTRACT_SUBREG (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (EXTRACT_SUBREG (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>; } //===----------------------------------------------------------------------===// // Compare Instructions @@ -1636,13 +1636,13 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm, let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (COPY_TO_REGCLASS (VPCMPGTDZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (COPY_TO_REGCLASS (VPCMPEQDZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>; } multiclass avx512_icmp_cc opc, string Suffix, SDNode OpNode, @@ -1897,18 +1897,18 @@ defm VCMPPS : avx512_vcmp, def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)), (COPY_TO_REGCLASS (VCMPPSZrri - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (COPY_TO_REGCLASS (VPCMPDZrri - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)), (COPY_TO_REGCLASS (VPCMPUDZrri - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)), + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)), imm:$cc), VK8)>; // ---------------------------------------------------------------- @@ -2097,7 +2097,7 @@ def : Pat<(i8 (bitconvert (v8i1 VK8:$src))), def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))), (KMOVWrk VK16:$src)>; def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))), - (i32 (SUBREG_TO_REG (i64 0), + (i32 (INSERT_SUBREG (IMPLICIT_DEF), (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>; def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), @@ -2105,7 +2105,7 @@ def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))), (KMOVBrk VK8:$src)>, Requires<[HasDQI]>; def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))), - (i32 (SUBREG_TO_REG (i64 0), + (i32 (INSERT_SUBREG (IMPLICIT_DEF), (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>; def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), @@ -2194,12 +2194,14 @@ let Predicates = [HasAVX512] in { def : Pat<(i1 (trunc (i8 GR8:$src))), (COPY_TO_REGCLASS - (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit), (i32 1))), + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit), (i32 1))), VK1)>; def : Pat<(i1 (trunc (i16 GR16:$src))), (COPY_TO_REGCLASS - (KMOVWkr (AND32ri8 (SUBREG_TO_REG (i32 0), $src, sub_16bit), (i32 1))), + (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR16:$src, sub_16bit), (i32 1))), VK1)>; def : Pat<(i32 (zext VK1:$src)), @@ -2221,7 +2223,7 @@ let Predicates = [HasAVX512] in { (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>; def : Pat<(i64 (anyext VK1:$src)), - (SUBREG_TO_REG (i64 0), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>; def : Pat<(i16 (zext VK1:$src)), @@ -6188,27 +6190,33 @@ defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, let Predicates = [HasAVX512, NoVLX] in { def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr - (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), + VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))), (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr - (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; + (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))), (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr - (v8f64 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_xmm)>; + (v8f64 (INSERT_SUBREG (IMPLICIT_DEF), + VR256X:$src1, sub_ymm)))), sub_xmm)>; def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>; + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), + VR256X:$src1, sub_ymm)))), sub_ymm)>; def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr - (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; + (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_xmm)>; def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr - (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>; + (v8i32 (INSERT_SUBREG (IMPLICIT_DEF), + VR128X:$src1, sub_xmm)))), sub_ymm)>; } let Predicates = [HasAVX512] in { @@ -6951,17 +6959,17 @@ defm VPMOVUSWB : avx512_trunc_sat_wb<0x10, "us", X86vtruncus>; let Predicates = [HasAVX512, NoVLX] in { def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))), (v8i16 (EXTRACT_SUBREG - (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0), + (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm)))), sub_xmm))>; def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))), (v4i32 (EXTRACT_SUBREG - (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0), + (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm)))), sub_xmm))>; } let Predicates = [HasBWI, NoVLX] in { def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))), - (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0), + (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src, sub_ymm))), sub_xmm))>; } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ebbed64977e..d02f8335196 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -8362,20 +8362,24 @@ let Predicates = [HasAVX2, NoVLX] in { let Predicates = [HasAVX2, NoVLX_Or_NoBWI], AddedComplexity = 20 in { def : Pat<(v16i8 (X86VBroadcast GR8:$src)), (VPBROADCASTBrr (COPY_TO_REGCLASS - (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit)), VR128))>; def : Pat<(v32i8 (X86VBroadcast GR8:$src)), (VPBROADCASTBYrr (COPY_TO_REGCLASS - (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR8:$src, sub_8bit)), VR128))>; def : Pat<(v8i16 (X86VBroadcast GR16:$src)), (VPBROADCASTWrr (COPY_TO_REGCLASS - (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR16:$src, sub_16bit)), VR128))>; def : Pat<(v16i16 (X86VBroadcast GR16:$src)), (VPBROADCASTWYrr (COPY_TO_REGCLASS - (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), + (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)), + GR16:$src, sub_16bit)), VR128))>; } let Predicates = [HasAVX2, NoVLX], AddedComplexity = 20 in { diff --git a/test/CodeGen/X86/avx512-fsel.ll b/test/CodeGen/X86/avx512-fsel.ll index 42469e54b55..2c9c42b84b9 100644 --- a/test/CodeGen/X86/avx512-fsel.ll +++ b/test/CodeGen/X86/avx512-fsel.ll @@ -17,10 +17,12 @@ define i32 @test(float %a, float %b) { ; CHECK-NEXT: setp %sil ; CHECK-NEXT: setne %dil ; CHECK-NEXT: andb %cl, %dl +; CHECK-NEXT: ## implicit-def: %R8D ; CHECK-NEXT: movb %dl, %r8b ; CHECK-NEXT: andl $1, %r8d ; CHECK-NEXT: kmovw %r8d, %k0 ; CHECK-NEXT: orb %sil, %dil +; CHECK-NEXT: ## implicit-def: %R8D ; CHECK-NEXT: movb %dil, %r8b ; CHECK-NEXT: andl $1, %r8d ; CHECK-NEXT: kmovw %r8d, %k1 diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll index 0e8e9f4212c..3c74aec55ba 100644 --- a/test/CodeGen/X86/avx512-intrinsics.ll +++ b/test/CodeGen/X86/avx512-intrinsics.ll @@ -1094,9 +1094,9 @@ define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2 ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k3, %eax -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k4, %eax +; CHECK-NEXT: kmovw %k3, %ecx +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 @@ -1142,9 +1142,9 @@ define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { ; CHECK-NEXT: vpcmpnltq %zmm1, %zmm0, %k2 {%k3} ; CHECK-NEXT: vpcmpnleq %zmm1, %zmm0, %k1 {%k3} ; CHECK-NEXT: vpcmpordq %zmm1, %zmm0, %k3 {%k3} -; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k5, %eax +; CHECK-NEXT: kmovw %k4, %ecx +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 @@ -1191,9 +1191,9 @@ define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) { ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2 ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k0 -; CHECK-NEXT: kmovw %k3, %eax -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k4, %eax +; CHECK-NEXT: kmovw %k3, %ecx +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k5, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 @@ -1239,9 +1239,9 @@ define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) { ; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k2 {%k3} ; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k3} ; CHECK-NEXT: vpcmporduq %zmm1, %zmm0, %k3 {%k3} -; CHECK-NEXT: kmovw %k4, %eax -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k5, %eax +; CHECK-NEXT: kmovw %k4, %ecx +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; CHECK-NEXT: kmovw %k6, %eax ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 3c12018af9e..f0c4e6564af 100644 --- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -694,9 +694,9 @@ define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x05] ; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x06] ; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x07] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] @@ -742,9 +742,9 @@ define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { ; CHECK-NEXT: vpcmpnltw %xmm1, %xmm0, %k2 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd1,0x05] ; CHECK-NEXT: vpcmpnlew %xmm1, %xmm0, %k1 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xc9,0x06] ; CHECK-NEXT: vpcmpordw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3f,0xd9,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] @@ -791,9 +791,9 @@ define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x06] ; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x07] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] @@ -839,9 +839,9 @@ define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) { ; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k2 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xc9,0x06] ; CHECK-NEXT: vpcmporduw %xmm1, %xmm0, %k3 {%k3} ## encoding: [0x62,0xf3,0xfd,0x0b,0x3e,0xd9,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04] diff --git a/test/CodeGen/X86/avx512dq-intrinsics.ll b/test/CodeGen/X86/avx512dq-intrinsics.ll index e684ad96d55..f9a22d89423 100644 --- a/test/CodeGen/X86/avx512dq-intrinsics.ll +++ b/test/CodeGen/X86/avx512dq-intrinsics.ll @@ -469,9 +469,9 @@ define i16@test_int_x86_avx512_mask_fpclass_ps_512(<16 x float> %x0, i16 %x1) { ; CHECK-LABEL: test_int_x86_avx512_mask_fpclass_ps_512: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} -; CHECK-NEXT: kmovw %k0, %ecx ; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 +; CHECK-NEXT: kmovw %k0, %ecx +; CHECK-NEXT: vfpclassps $4, %zmm0, %k0 {%k1} ; CHECK-NEXT: kmovw %k0, %eax ; CHECK-NEXT: addl %ecx, %eax ; CHECK-NEXT: ## kill: %AX %AX %EAX diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll index 5932e14d6fc..bf780e6f36f 100644 --- a/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -14,9 +14,9 @@ define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xc1,0x07] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -62,9 +62,9 @@ define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { ; CHECK-NEXT: vpcmpnltd %ymm1, %ymm0, %k2 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnled %ymm1, %ymm0, %k1 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordd %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1f,0xd9,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -111,9 +111,9 @@ define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc1,0x07] -; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] +; CHECK-NEXT: kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -159,9 +159,9 @@ define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) { ; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k2 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmpordud %ymm1, %ymm0, %k3 {%k3} ## encoding: [0x62,0xf3,0x7d,0x2b,0x1e,0xd9,0x07] -; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] ; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] ; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] @@ -200,19 +200,19 @@ declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounw define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_cmp_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x00] -; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x01] -; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x02] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe9,0x00] +; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf9,0x01] +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xf1,0x02] ; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x03] ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04] ; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -248,19 +248,19 @@ define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xc1,0x00] -; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf1,0x01] -; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xe9,0x02] +; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xe9,0x00] +; CHECK-NEXT: vpcmpltq %ymm1, %ymm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xc1,0x01] +; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf1,0x02] ; CHECK-NEXT: vpcmpunordq %ymm1, %ymm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xe1,0x03] ; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xd9,0x04] ; CHECK-NEXT: vpcmpnltq %ymm1, %ymm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnleq %ymm1, %ymm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1f,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -297,19 +297,19 @@ declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwi define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) { ; CHECK-LABEL: test_ucmp_q_256: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x00] -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x01] -; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x02] +; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x00] +; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf9,0x01] +; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xf1,0x02] ; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x03] ; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd9,0x04] ; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -345,19 +345,19 @@ define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_q_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xc1,0x00] -; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf1,0x01] -; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xe9,0x02] +; CHECK-NEXT: vpcmpequq %ymm1, %ymm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xe9,0x00] +; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xc1,0x01] +; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf1,0x02] ; CHECK-NEXT: vpcmpunorduq %ymm1, %ymm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xe1,0x03] ; CHECK-NEXT: vpcmpnequq %ymm1, %ymm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xd9,0x04] ; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmporduq %ymm1, %ymm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x2f,0x1e,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -396,19 +396,19 @@ declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounw define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_cmp_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x00] -; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x01] -; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x02] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe9,0x00] +; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf9,0x01] +; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xf1,0x02] ; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x03] ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04] ; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -444,19 +444,19 @@ define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xc1,0x00] -; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf1,0x01] -; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xe9,0x02] +; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xe9,0x00] +; CHECK-NEXT: vpcmpltd %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xc1,0x01] +; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf1,0x02] ; CHECK-NEXT: vpcmpunordd %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xe1,0x03] ; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xd9,0x04] ; CHECK-NEXT: vpcmpnltd %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnled %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordd %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1f,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -493,19 +493,19 @@ declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwi define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: test_ucmp_d_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x00] -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x01] -; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x02] +; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x00] +; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf9,0x01] +; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xf1,0x02] ; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x03] ; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd9,0x04] ; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -541,19 +541,19 @@ define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_d_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xc1,0x00] -; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf1,0x01] -; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xe9,0x02] +; CHECK-NEXT: vpcmpequd %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xe9,0x00] +; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xc1,0x01] +; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf1,0x02] ; CHECK-NEXT: vpcmpunordud %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xe1,0x03] ; CHECK-NEXT: vpcmpnequd %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xd9,0x04] ; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmpordud %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0x7d,0x0f,0x1e,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -590,19 +590,19 @@ declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounw define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_cmp_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x00] -; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x01] -; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x02] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe9,0x00] +; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf9,0x01] +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xf1,0x02] ; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x03] ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04] ; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -638,19 +638,19 @@ define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_cmp_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc1,0x00] -; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf1,0x01] -; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xe9,0x02] +; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xe9,0x00] +; CHECK-NEXT: vpcmpltq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc1,0x01] +; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf1,0x02] ; CHECK-NEXT: vpcmpunordq %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xe1,0x03] ; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd9,0x04] ; CHECK-NEXT: vpcmpnltq %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xd1,0x05] ; CHECK-NEXT: vpcmpnleq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xc9,0x06] ; CHECK-NEXT: vpcmpordq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1f,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -687,19 +687,19 @@ declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwi define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) { ; CHECK-LABEL: test_ucmp_q_128: ; CHECK: ## BB#0: -; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x00] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x01] -; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x02] +; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x00] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf9,0x01] +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xf1,0x02] ; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x03] ; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd9,0x04] ; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc1,0x07] ; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] @@ -735,19 +735,19 @@ define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) { ; CHECK-LABEL: test_mask_ucmp_q_128: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k7 ## encoding: [0xc5,0xf8,0x92,0xff] -; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc1,0x00] -; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf1,0x01] -; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe9,0x02] +; CHECK-NEXT: vpcmpequq %xmm1, %xmm0, %k5 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe9,0x00] +; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc1,0x01] +; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k6 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf1,0x02] ; CHECK-NEXT: vpcmpunorduq %xmm1, %xmm0, %k4 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xe1,0x03] ; CHECK-NEXT: vpcmpnequq %xmm1, %xmm0, %k3 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xd9,0x04] ; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k2 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xd1,0x05] ; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xc9,0x06] ; CHECK-NEXT: vpcmporduq %xmm1, %xmm0, %k7 {%k7} ## encoding: [0x62,0xf3,0xfd,0x0f,0x1e,0xf9,0x07] ; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0] -; CHECK-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x00] -; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] +; CHECK-NEXT: kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd] +; CHECK-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc1,0x00] ; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] -; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5] +; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6] ; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] ; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4] ; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] diff --git a/test/CodeGen/X86/pr27591.ll b/test/CodeGen/X86/pr27591.ll index 53a23b3c6b9..3331a9354fc 100644 --- a/test/CodeGen/X86/pr27591.ll +++ b/test/CodeGen/X86/pr27591.ll @@ -8,6 +8,7 @@ define void @test1(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: setne %al +; CHECK-NEXT: # implicit-def: %EDI ; CHECK-NEXT: movb %al, %dil ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k0 @@ -29,6 +30,7 @@ define void @test2(i32 %x) #0 { ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: setne %al +; CHECK-NEXT: # implicit-def: %EDI ; CHECK-NEXT: movb %al, %dil ; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: kmovw %edi, %k0 -- 2.50.1