From: Craig Topper Date: Tue, 28 Nov 2017 01:36:31 +0000 (+0000) Subject: [X86] Add command line without AVX512BW/AVX512VL to bitcast-int-to-vector-bool-zext.ll. X-Git-Url: https://granicus.if.org/sourcecode?a=commitdiff_plain;h=f058704792c89c06a02162200a933bdf61444fe2;p=llvm [X86] Add command line without AVX512BW/AVX512VL to bitcast-int-to-vector-bool-zext.ll. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@319135 91177308-0d34-0410-b5e6-96231b3b80d8 --- diff --git a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll index 4ed55ac0919..356e9b4a5be 100644 --- a/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll +++ b/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll @@ -3,7 +3,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE2-SSSE3,SSSE3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX12,AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX12,AVX2 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512,AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512VLBW ; ; 128-bit vectors @@ -45,16 +46,27 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) { ; AVX2-NEXT: vpsrlq $63, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i2_2i64: -; AVX512: # BB#0: -; AVX512-NEXT: andb $3, %dil -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512-NEXT: # kill: %XMM0 %XMM0 %ZMM0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i2_2i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: andb $3, %dil +; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i2_2i64: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: andb $3, %dil +; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VLBW-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; AVX512VLBW-NEXT: vzeroupper +; AVX512VLBW-NEXT: retq %1 = bitcast i2 %a0 to <2 x i1> %2 = zext <2 x i1> %1 to <2 x i64> ret <2 x i64> %2 @@ -91,16 +103,28 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) { ; AVX2-NEXT: vpsrld $31, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i4_4i32: -; AVX512: # BB#0: -; AVX512-NEXT: andb $15, %dil -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; AVX512-NEXT: # kill: %XMM0 %XMM0 %YMM0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i4_4i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: andb $15, %dil +; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i4_4i32: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: andb $15, %dil +; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; AVX512VLBW-NEXT: # kill: %XMM0 %XMM0 %YMM0 +; AVX512VLBW-NEXT: vzeroupper +; AVX512VLBW-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> %2 = zext <4 x i1> %1 to <4 x i32> ret <4 x i32> %2 @@ -139,50 +163,95 @@ define <8 x i16> @ext_i8_8i16(i8 %a0) { ; AVX2-NEXT: vpsrlw $15, %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i8_8i16: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k5 -; AVX512-NEXT: kshiftlw $8, %k5, %k0 -; AVX512-NEXT: kshiftrw $15, %k0, %k0 -; AVX512-NEXT: kshiftlw $9, %k5, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kshiftlw $10, %k5, %k2 -; AVX512-NEXT: kshiftrw $15, %k2, %k2 -; AVX512-NEXT: kshiftlw $11, %k5, %k3 -; AVX512-NEXT: kshiftrw $15, %k3, %k3 -; AVX512-NEXT: kshiftlw $12, %k5, %k4 -; AVX512-NEXT: kshiftrw $15, %k4, %k4 -; AVX512-NEXT: kshiftlw $13, %k5, %k6 -; AVX512-NEXT: kshiftrw $15, %k6, %k6 -; AVX512-NEXT: kshiftlw $15, %k5, %k7 -; AVX512-NEXT: kshiftrw $15, %k7, %k7 -; AVX512-NEXT: kshiftlw $14, %k5, %k5 -; AVX512-NEXT: kshiftrw $15, %k5, %k5 -; AVX512-NEXT: kmovd %k5, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: kmovd %k7, %ecx -; AVX512-NEXT: andl $1, %ecx -; AVX512-NEXT: vmovd %ecx, %xmm0 -; AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kmovd %k6, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kmovd %k4, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kmovd %k3, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kmovd %k2, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kmovd %k1, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; AVX512-NEXT: kmovd %k0, %eax -; AVX512-NEXT: andl $1, %eax -; AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i8_8i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k5 +; AVX512F-NEXT: kshiftlw $8, %k5, %k0 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kshiftlw $9, %k5, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kshiftlw $10, %k5, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kshiftlw $11, %k5, %k3 +; AVX512F-NEXT: kshiftrw $15, %k3, %k3 +; AVX512F-NEXT: kshiftlw $12, %k5, %k4 +; AVX512F-NEXT: kshiftrw $15, %k4, %k4 +; AVX512F-NEXT: kshiftlw $13, %k5, %k6 +; AVX512F-NEXT: kshiftrw $15, %k6, %k6 +; AVX512F-NEXT: kshiftlw $15, %k5, %k7 +; AVX512F-NEXT: kshiftrw $15, %k7, %k7 +; AVX512F-NEXT: kshiftlw $14, %k5, %k5 +; AVX512F-NEXT: kshiftrw $15, %k5, %k5 +; AVX512F-NEXT: kmovw %k5, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: kmovw %k7, %ecx +; AVX512F-NEXT: andl $1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kmovw %k6, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kmovw %k4, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kmovw %k3, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: andl $1, %eax +; AVX512F-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i8_8i16: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k5 +; AVX512VLBW-NEXT: kshiftlw $8, %k5, %k0 +; AVX512VLBW-NEXT: kshiftrw $15, %k0, %k0 +; AVX512VLBW-NEXT: kshiftlw $9, %k5, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kshiftlw $10, %k5, %k2 +; AVX512VLBW-NEXT: kshiftrw $15, %k2, %k2 +; AVX512VLBW-NEXT: kshiftlw $11, %k5, %k3 +; AVX512VLBW-NEXT: kshiftrw $15, %k3, %k3 +; AVX512VLBW-NEXT: kshiftlw $12, %k5, %k4 +; AVX512VLBW-NEXT: kshiftrw $15, %k4, %k4 +; AVX512VLBW-NEXT: kshiftlw $13, %k5, %k6 +; AVX512VLBW-NEXT: kshiftrw $15, %k6, %k6 +; AVX512VLBW-NEXT: kshiftlw $15, %k5, %k7 +; AVX512VLBW-NEXT: kshiftrw $15, %k7, %k7 +; AVX512VLBW-NEXT: kshiftlw $14, %k5, %k5 +; AVX512VLBW-NEXT: kshiftrw $15, %k5, %k5 +; AVX512VLBW-NEXT: kmovd %k5, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: kmovd %k7, %ecx +; AVX512VLBW-NEXT: andl $1, %ecx +; AVX512VLBW-NEXT: vmovd %ecx, %xmm0 +; AVX512VLBW-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: kmovd %k6, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: kmovd %k4, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: kmovd %k3, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: kmovd %k2, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: kmovd %k1, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: kmovd %k0, %eax +; AVX512VLBW-NEXT: andl $1, %eax +; AVX512VLBW-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: retq %1 = bitcast i8 %a0 to <8 x i1> %2 = zext <8 x i1> %1 to <8 x i16> ret <8 x i16> %2 @@ -235,98 +304,191 @@ define <16 x i8> @ext_i16_16i8(i16 %a0) { ; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i16_16i8: -; AVX512: # BB#0: -; AVX512-NEXT: pushq %rbp -; AVX512-NEXT: .cfi_def_cfa_offset 16 -; AVX512-NEXT: pushq %r15 -; AVX512-NEXT: .cfi_def_cfa_offset 24 -; AVX512-NEXT: pushq %r14 -; AVX512-NEXT: .cfi_def_cfa_offset 32 -; AVX512-NEXT: pushq %r13 -; AVX512-NEXT: .cfi_def_cfa_offset 40 -; AVX512-NEXT: pushq %r12 -; AVX512-NEXT: .cfi_def_cfa_offset 48 -; AVX512-NEXT: pushq %rbx -; AVX512-NEXT: .cfi_def_cfa_offset 56 -; AVX512-NEXT: .cfi_offset %rbx, -56 -; AVX512-NEXT: .cfi_offset %r12, -48 -; AVX512-NEXT: .cfi_offset %r13, -40 -; AVX512-NEXT: .cfi_offset %r14, -32 -; AVX512-NEXT: .cfi_offset %r15, -24 -; AVX512-NEXT: .cfi_offset %rbp, -16 -; AVX512-NEXT: kmovd %edi, %k0 -; AVX512-NEXT: kshiftlw $14, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r8d -; AVX512-NEXT: kshiftlw $15, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r9d -; AVX512-NEXT: kshiftlw $13, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r10d -; AVX512-NEXT: kshiftlw $12, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r11d -; AVX512-NEXT: kshiftlw $11, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r14d -; AVX512-NEXT: kshiftlw $10, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r15d -; AVX512-NEXT: kshiftlw $9, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r12d -; AVX512-NEXT: kshiftlw $8, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %r13d -; AVX512-NEXT: kshiftlw $7, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %esi -; AVX512-NEXT: kshiftlw $6, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %ebx -; AVX512-NEXT: kshiftlw $5, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %ebp -; AVX512-NEXT: kshiftlw $4, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %edi -; AVX512-NEXT: kshiftlw $3, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %eax -; AVX512-NEXT: kshiftlw $2, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %ecx -; AVX512-NEXT: kshiftlw $1, %k0, %k1 -; AVX512-NEXT: kshiftrw $15, %k1, %k1 -; AVX512-NEXT: kmovd %k1, %edx -; AVX512-NEXT: kshiftrw $15, %k0, %k0 -; AVX512-NEXT: vmovd %r9d, %xmm0 -; AVX512-NEXT: kmovd %k0, %r9d -; AVX512-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0 -; AVX512-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0 -; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: popq %rbx -; AVX512-NEXT: popq %r12 -; AVX512-NEXT: popq %r13 -; AVX512-NEXT: popq %r14 -; AVX512-NEXT: popq %r15 -; AVX512-NEXT: popq %rbp -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i16_16i8: +; AVX512F: # BB#0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-NEXT: pushq %r15 +; AVX512F-NEXT: .cfi_def_cfa_offset 24 +; AVX512F-NEXT: pushq %r14 +; AVX512F-NEXT: .cfi_def_cfa_offset 32 +; AVX512F-NEXT: pushq %r13 +; AVX512F-NEXT: .cfi_def_cfa_offset 40 +; AVX512F-NEXT: pushq %r12 +; AVX512F-NEXT: .cfi_def_cfa_offset 48 +; AVX512F-NEXT: pushq %rbx +; AVX512F-NEXT: .cfi_def_cfa_offset 56 +; AVX512F-NEXT: .cfi_offset %rbx, -56 +; AVX512F-NEXT: .cfi_offset %r12, -48 +; AVX512F-NEXT: .cfi_offset %r13, -40 +; AVX512F-NEXT: .cfi_offset %r14, -32 +; AVX512F-NEXT: .cfi_offset %r15, -24 +; AVX512F-NEXT: .cfi_offset %rbp, -16 +; AVX512F-NEXT: kmovw %edi, %k0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r8d +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r9d +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r10d +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r11d +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r14d +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r15d +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r12d +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %r13d +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %esi +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ebx +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ebp +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %edi +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %edx +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: vmovd %r9d, %xmm0 +; AVX512F-NEXT: kmovw %k0, %r9d +; AVX512F-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0 +; AVX512F-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: popq %rbx +; AVX512F-NEXT: popq %r12 +; AVX512F-NEXT: popq %r13 +; AVX512F-NEXT: popq %r14 +; AVX512F-NEXT: popq %r15 +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i16_16i8: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: pushq %rbp +; AVX512VLBW-NEXT: .cfi_def_cfa_offset 16 +; AVX512VLBW-NEXT: pushq %r15 +; AVX512VLBW-NEXT: .cfi_def_cfa_offset 24 +; AVX512VLBW-NEXT: pushq %r14 +; AVX512VLBW-NEXT: .cfi_def_cfa_offset 32 +; AVX512VLBW-NEXT: pushq %r13 +; AVX512VLBW-NEXT: .cfi_def_cfa_offset 40 +; AVX512VLBW-NEXT: pushq %r12 +; AVX512VLBW-NEXT: .cfi_def_cfa_offset 48 +; AVX512VLBW-NEXT: pushq %rbx +; AVX512VLBW-NEXT: .cfi_def_cfa_offset 56 +; AVX512VLBW-NEXT: .cfi_offset %rbx, -56 +; AVX512VLBW-NEXT: .cfi_offset %r12, -48 +; AVX512VLBW-NEXT: .cfi_offset %r13, -40 +; AVX512VLBW-NEXT: .cfi_offset %r14, -32 +; AVX512VLBW-NEXT: .cfi_offset %r15, -24 +; AVX512VLBW-NEXT: .cfi_offset %rbp, -16 +; AVX512VLBW-NEXT: kmovd %edi, %k0 +; AVX512VLBW-NEXT: kshiftlw $14, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r8d +; AVX512VLBW-NEXT: kshiftlw $15, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r9d +; AVX512VLBW-NEXT: kshiftlw $13, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r10d +; AVX512VLBW-NEXT: kshiftlw $12, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r11d +; AVX512VLBW-NEXT: kshiftlw $11, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r14d +; AVX512VLBW-NEXT: kshiftlw $10, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r15d +; AVX512VLBW-NEXT: kshiftlw $9, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r12d +; AVX512VLBW-NEXT: kshiftlw $8, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %r13d +; AVX512VLBW-NEXT: kshiftlw $7, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %esi +; AVX512VLBW-NEXT: kshiftlw $6, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %ebx +; AVX512VLBW-NEXT: kshiftlw $5, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %ebp +; AVX512VLBW-NEXT: kshiftlw $4, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %edi +; AVX512VLBW-NEXT: kshiftlw $3, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %eax +; AVX512VLBW-NEXT: kshiftlw $2, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %ecx +; AVX512VLBW-NEXT: kshiftlw $1, %k0, %k1 +; AVX512VLBW-NEXT: kshiftrw $15, %k1, %k1 +; AVX512VLBW-NEXT: kmovd %k1, %edx +; AVX512VLBW-NEXT: kshiftrw $15, %k0, %k0 +; AVX512VLBW-NEXT: vmovd %r9d, %xmm0 +; AVX512VLBW-NEXT: kmovd %k0, %r9d +; AVX512VLBW-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $2, %r10d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $10, %ebp, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $11, %edi, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $14, %edx, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpinsrb $15, %r9d, %xmm0, %xmm0 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VLBW-NEXT: popq %rbx +; AVX512VLBW-NEXT: popq %r12 +; AVX512VLBW-NEXT: popq %r13 +; AVX512VLBW-NEXT: popq %r14 +; AVX512VLBW-NEXT: popq %r15 +; AVX512VLBW-NEXT: popq %rbp +; AVX512VLBW-NEXT: retq %1 = bitcast i16 %a0 to <16 x i1> %2 = zext <16 x i1> %1 to <16 x i8> ret <16 x i8> %2 @@ -387,15 +549,25 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) { ; AVX2-NEXT: vpsrlq $63, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i4_4i64: -; AVX512: # BB#0: -; AVX512-NEXT: andb $15, %dil -; AVX512-NEXT: movb %dil, -{{[0-9]+}}(%rsp) -; AVX512-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax -; AVX512-NEXT: kmovd %eax, %k1 -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512-NEXT: # kill: %YMM0 %YMM0 %ZMM0 -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i4_4i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: andb $15, %dil +; AVX512F-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX512F-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; AVX512F-NEXT: kmovw %eax, %k1 +; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i4_4i64: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: andb $15, %dil +; AVX512VLBW-NEXT: movb %dil, -{{[0-9]+}}(%rsp) +; AVX512VLBW-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax +; AVX512VLBW-NEXT: kmovd %eax, %k1 +; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VLBW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512VLBW-NEXT: retq %1 = bitcast i4 %a0 to <4 x i1> %2 = zext <4 x i1> %1 to <4 x i64> ret <4 x i64> %2 @@ -445,11 +617,18 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) { ; AVX2-NEXT: vpsrld $31, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i8_8i32: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k1 -; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i8_8i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i8_8i32: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k1 +; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i8 %a0 to <8 x i1> %2 = zext <8 x i1> %1 to <8 x i32> ret <8 x i32> %2 @@ -501,11 +680,18 @@ define <16 x i16> @ext_i16_16i16(i16 %a0) { ; AVX2-NEXT: vpsrlw $15, %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i16_16i16: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k1 -; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i16_16i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i16_16i16: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k1 +; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %ymm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i16 %a0 to <16 x i1> %2 = zext <16 x i1> %1 to <16 x i16> ret <16 x i16> %2 @@ -573,11 +759,155 @@ define <32 x i8> @ext_i32_32i8(i32 %a0) { ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i32_32i8: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k1 -; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i32_32i8: +; AVX512F: # BB#0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-NEXT: .cfi_offset %rbp, -16 +; AVX512F-NEXT: movq %rsp, %rbp +; AVX512F-NEXT: .cfi_def_cfa_register %rbp +; AVX512F-NEXT: andq $-32, %rsp +; AVX512F-NEXT: subq $32, %rsp +; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw (%rsp), %k0 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kshiftlw $14, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: kshiftlw $15, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $13, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $12, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $11, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $10, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $9, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $8, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $7, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $6, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $5, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $4, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $3, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $2, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $1, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm1 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512F-NEXT: movq %rbp, %rsp +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i32_32i8: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k1 +; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %ymm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i32 %a0 to <32 x i1> %2 = zext <32 x i1> %1 to <32 x i8> ret <32 x i8> %2 @@ -665,11 +995,17 @@ define <8 x i64> @ext_i8_8i64(i8 %a0) { ; AVX2-NEXT: vpsrlq $63, %ymm1, %ymm1 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i8_8i64: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k1 -; AVX512-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i8_8i64: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i8_8i64: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k1 +; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i8 %a0 to <8 x i1> %2 = zext <8 x i1> %1 to <8 x i64> ret <8 x i64> %2 @@ -742,11 +1078,17 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) { ; AVX2-NEXT: vpsrld $31, %ymm1, %ymm1 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i16_16i32: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k1 -; AVX512-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i16_16i32: +; AVX512F: # BB#0: +; AVX512F-NEXT: kmovw %edi, %k1 +; AVX512F-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i16_16i32: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k1 +; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i16 %a0 to <16 x i1> %2 = zext <16 x i1> %1 to <16 x i32> ret <16 x i32> %2 @@ -826,11 +1168,36 @@ define <32 x i16> @ext_i32_32i16(i32 %a0) { ; AVX2-NEXT: vpsrlw $15, %ymm1, %ymm1 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i32_32i16: -; AVX512: # BB#0: -; AVX512-NEXT: kmovd %edi, %k1 -; AVX512-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i32_32i16: +; AVX512F: # BB#0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-NEXT: .cfi_offset %rbp, -16 +; AVX512F-NEXT: movq %rsp, %rbp +; AVX512F-NEXT: .cfi_def_cfa_register %rbp +; AVX512F-NEXT: andq $-32, %rsp +; AVX512F-NEXT: subq $32, %rsp +; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: kmovw (%rsp), %k1 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm1 +; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; AVX512F-NEXT: movq %rbp, %rsp +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i32_32i16: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovd %edi, %k1 +; AVX512VLBW-NEXT: vmovdqu16 {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i32 %a0 to <32 x i1> %2 = zext <32 x i1> %1 to <32 x i16> ret <32 x i16> %2 @@ -937,11 +1304,288 @@ define <64 x i8> @ext_i64_64i8(i64 %a0) { ; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1 ; AVX2-NEXT: retq ; -; AVX512-LABEL: ext_i64_64i8: -; AVX512: # BB#0: -; AVX512-NEXT: kmovq %rdi, %k1 -; AVX512-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} -; AVX512-NEXT: retq +; AVX512F-LABEL: ext_i64_64i8: +; AVX512F: # BB#0: +; AVX512F-NEXT: pushq %rbp +; AVX512F-NEXT: .cfi_def_cfa_offset 16 +; AVX512F-NEXT: .cfi_offset %rbp, -16 +; AVX512F-NEXT: movq %rsp, %rbp +; AVX512F-NEXT: .cfi_def_cfa_register %rbp +; AVX512F-NEXT: andq $-32, %rsp +; AVX512F-NEXT: subq $64, %rsp +; AVX512F-NEXT: movl %edi, (%rsp) +; AVX512F-NEXT: shrq $32, %rdi +; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; AVX512F-NEXT: kmovw (%rsp), %k0 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kshiftlw $14, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: kshiftlw $15, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm0 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $13, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $12, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $11, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $10, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $9, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $8, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $7, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $6, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $5, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $4, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $3, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $2, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $1, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm1 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k0 +; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 +; AVX512F-NEXT: kshiftlw $14, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: kshiftlw $15, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm2 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $13, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $12, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $11, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $10, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $9, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $8, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $7, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $6, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $5, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $4, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $3, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $2, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $1, %k1, %k2 +; AVX512F-NEXT: kshiftrw $15, %k2, %k2 +; AVX512F-NEXT: kmovw %k2, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 +; AVX512F-NEXT: kshiftlw $14, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: kshiftlw $15, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %ecx +; AVX512F-NEXT: vmovd %ecx, %xmm3 +; AVX512F-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $13, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $12, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $11, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $10, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $9, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $8, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $7, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $6, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $5, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $4, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $3, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $2, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftlw $1, %k0, %k1 +; AVX512F-NEXT: kshiftrw $15, %k1, %k1 +; AVX512F-NEXT: kmovw %k1, %eax +; AVX512F-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: kshiftrw $15, %k0, %k0 +; AVX512F-NEXT: kmovw %k0, %eax +; AVX512F-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 +; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 +; AVX512F-NEXT: vpand %ymm1, %ymm2, %ymm1 +; AVX512F-NEXT: movq %rbp, %rsp +; AVX512F-NEXT: popq %rbp +; AVX512F-NEXT: retq +; +; AVX512VLBW-LABEL: ext_i64_64i8: +; AVX512VLBW: # BB#0: +; AVX512VLBW-NEXT: kmovq %rdi, %k1 +; AVX512VLBW-NEXT: vmovdqu8 {{.*}}(%rip), %zmm0 {%k1} {z} +; AVX512VLBW-NEXT: retq %1 = bitcast i64 %a0 to <64 x i1> %2 = zext <64 x i1> %1 to <64 x i8> ret <64 x i8> %2